コード例 #1
0
def test_csv_parser_with_blank_file():
    
    global test_file    

    # value missing
    with open(test_file,"w")as f:
        f.write("")

    y = CsvParser(test_file)
    y.load_file()
    y.parse_file()
    y.process_file()
    assert(y.get_value() == 0)
コード例 #2
0
def _get_vehicle_and_passes_from_csv(
        path_to_csv: str) -> typing.Tuple[Vehicle, list]:
    """Get data to be analyzed, will be parsed into a Vehicle object as well as a list of passes
    
    Args:
        path_to_csv (str)
    
    Returns:
        typing.Tuple[Vehicle, list]
    """

    cp = CsvParser()
    licence_plate, passes = cp.parse_csv(path_to_csv)
    return _get_vehicle_from_licence_plate(licence_plate), passes
コード例 #3
0
def test_csv_parser_with_no_child_nodes():
    
    global test_file    

    # value missing
    with open(test_file,"w")as f:
        f.write("""name,active,value""")

    y = CsvParser(test_file)
    y.load_file()
    y.parse_file()
    y.process_file()
    assert(y.get_value() == 0)

    # delete the test file as not required 
    os.remove(test_file)
コード例 #4
0
def test_csv_parser_with_non_csv_values():
    
    global test_file    

    # value missing
    with open(test_file,"w")as f:
        f.write("""name,active,value
John true 
Mark true 
Paul false 100
Ben true 150
""")

    y = CsvParser(test_file)
    y.load_file()
    y.parse_file()
    y.process_file()
    assert(y.get_value() == 0)
コード例 #5
0
def test_csv_parser_with_missing_values():
    
    global test_file    

    # value missing
    with open(test_file,"w")as f:
        f.write("""name,active,value
John,true,
Mark,true,
Paul,false,100
Ben,true,150
""")

    y = CsvParser(test_file)
    y.load_file()
    y.parse_file()
    y.process_file()
    assert(y.get_value() == 150)
コード例 #6
0
def run(website_urls,
        outputfolder='',
        export_tabs=False,
        export_reports=False,
        export_bulk_exports=False):

    # Automatically create the correct --headless Screaming Frog commands;
    sf = ScreamingFrogAnalyser(website_urls=website_urls,
                               outputfolder=outputfolder,
                               export_tabs=export_tabs,
                               export_reports=export_reports,
                               export_bulk_exports=export_bulk_exports)

    # Start running the web crawls
    sf.run_crawls()

    parser = CsvParser(outputfolder=outputfolder,
                       file_paths=sf._sf_folders,
                       website_urls=sf._website_urls)

    # Return the objects for running tests;
    return sf
コード例 #7
0
from csv_parser import CsvParser

__author__ = 'hellfish90'

if __name__ == '__main__':

    filename = "test_files/test_location.csv"
    parser = CsvParser(filename)
    header = parser.get_data_types()

    data_set = parser.get_set_by_data_and_location(1, 1)

    for item in header:
        print item,

    print ""

    for data in data_set[0]:
        print data['coordinates'], data['data']

    print "Missed Rows: ", len(data_set[1])

    parser.close_file()
コード例 #8
0
 def __init__(self):
     self._parser = {'.csv': CsvParser()}
     self.processor_factory = ProcessorFactory()
コード例 #9
0
def sf_run(website_urls,
           outputfolder='',
           export_tabs=False,
           export_reports=False,
           export_bulk_exports=False,
           push_data_to_biquery=False,
           create_bigquery_table=False,
           bigquery_table_mapping=BIGQUERY_TABLE_ID_MAPPINGS):

    if OUTPUTFOLDER == '':
        raise ValidationError(
            'Your OUTPUTFOLDER cannot be empty',
            'Please update your outputfolder to a valid value.')

    # Automatically create the correct --headless Screaming Frog commands;
    sf = ScreamingFrogAnalyser(website_urls=website_urls,
                               outputfolder=outputfolder,
                               export_tabs=export_tabs,
                               export_reports=export_reports,
                               export_bulk_exports=export_bulk_exports)

    # 1. Start running + saving the web crawls
    sf.run_crawls()

    parser = CsvParser(outputfolder=outputfolder,
                       file_paths=sf._sf_folders,
                       website_urls=sf._website_urls)

    # 2.1 Data checking: Making sure that there is data & at least one of the dataframes contains rows:
    if not any(dataframe_checker(parser)):
        print(
            '''Finished crawling and saved the output to your desired folder/folders. It's impossible to save to BigQuery because you have no .csv data.
        Re-run the script with export_tabs, export_reports, or export_bulk_exports if you would like to upload to BigQuery!
        Existing the program.
        ''')
        # exit() <-- Disabling this whilst running tests.
        return sf

    # 2.1 Data checking - For valid credentials (Google Cloud Project ID + Service Account Key):
    if push_data_to_biquery == True:
        config_setup_check(
            [GOOGLE_CLOUD_PROJECT_ID, SERVICE_ACCOUNT_KEY_LOCATION])
        # Google Cloud Credentials + BQ Client Initialisation
        credentials = service_account.Credentials.from_service_account_file(
            SERVICE_ACCOUNT_KEY_LOCATION)
        client = bigquery.Client(credentials=credentials,
                                 project=GOOGLE_CLOUD_PROJECT_ID)

    # 2.1 Data checking - Compile a list of dataframes that have both rows and columns:
    available_data = dataframe_row_checker(parser)

    # 3.1 Storing The Queryable Data:
    if create_bigquery_table == True:
        # Automatically generate the BigQuery tables with timestamped names + push the relevant data:
        print(
            "Some function here that will automatically generate Xn BigQuery tables."
        )
        pass
    else:
        # Automatically use the BigQuery Table Mapping
        print(
            "Some function here that will map the name of the BigQuery table_id against the csv_name."
        )
        if config._bigquery_inputs_validated == False:
            raise ValidationError(
                "You need to use a custom dictionary to map your concatenated .csv data against BigQuery table ids.",
                '''
            Please update the setup.yaml file with the relevant bigquery_tab_id mappings.'''
            )

        # Match the dictionary mapping against the available_data dictionary and only contain the Bigquery table_id's where there is data.
        # Error checking that the length of the dictionary keys are the same length as the available_data dict keys.
        pass

    # Return the objects for running tests;
    return sf
コード例 #10
0
def test_parser_multiple_files_csvs():
    parser = CsvParser(outputfolder=outputfolder,
                       file_paths=csv_multiple_file_paths,
                       website_urls=website_urls)
    # Multiple tests here:
    check_data_frame(parser)
コード例 #11
0
def test_parser_multiple_files_no_csvs():
    parser = CsvParser(outputfolder=outputfolder,
                       file_paths=seo_spider_multiple_file_paths,
                       website_urls=website_urls)
    assert len(parser._csv_data_dict.keys()) == 0
コード例 #12
0
ファイル: test_csv_parser.py プロジェクト: fluffy-fox/epam
 def setUp(self):
     self.csv_parser = CsvParser('1000 Sales Records.csv')
コード例 #13
0
ファイル: csv_parser_test.py プロジェクト: ly232/Kaggle
 def setUp(self):
     self._csv_parser = CsvParser('testdata/csv_parser_test.csv', 'col3')
コード例 #14
0
        # prefer method enumerate(x) over range(len(x))
        for product_index, product_name in enumerate(parser.product_names):
            cumulative_sales_per_product[product_index] += sales_per_week_per_product[product_index]

        number_of_records += 1

    sales_report.total_sales_per_product = dict(zip(parser.product_names, cumulative_sales_per_product))
    return number_of_records


if __name__ == '__main__':

    # print("Test")

    # sales_text contains entire csv file
    filename = './data/sales.csv'
    with open(filename) as f:
        sales_text = f.read()

    # print(sales_text)
    parser = CsvParser(sales_text)

    sales_report = generate_sales_report(parser)

    print(sales_report.total_sales_per_week_report())
    print(sales_report.week_with_highest_sales_report())

    print(sales_report.total_sales_per_product_report())
    # print(sales_report.total_sales_per_product_report_narrow_format())
    print(sales_report.average_weekly_sales_report())
コード例 #15
0
ファイル: run.py プロジェクト: vspark/cobbler-csv
def run():
    # Parse Command Line Options

    usage = "usage: %prog [options]"
    Parser = OptionParser(usage=usage)
    Parser.add_option("-f",
                      "--file",
                      dest="csv_file",
                      metavar="STRING",
                      help="CSV File Location")

    Parser.add_option("-c",
                      "--config",
                      dest="config_file",
                      default="/etc/cobbler-csv.conf",
                      metavar="STRING",
                      help="Config file (default: /etc/cobbler-csv.conf)")
    (options, args) = Parser.parse_args()

    if len(sys.argv) == 1:
        Parser.print_help()
        sys.exit(1)

    config = Config(configFile=options.config_file)
    csv = CsvParser(options.csv_file, config)

    for system in csv:
        hostname = config.get_mapping("hostname").format(**system)
        print "Creating new system %s...\n" % hostname

        cobbler_system = CobblerSystem(options.config_file, hostname=hostname)

        interface = {
            'interface': 'eth0',
            'macaddress': config.get_mapping("macaddress").format(**system),
            'ipaddress': config.get_mapping("ipaddress").format(**system),
            'subnet': config.get_mapping("subnet").format(**system),
            'gateway': config.get_mapping("gateway").format(**system),
            'static': config.get_mapping("static").format(**system)
        }

        cobbler_system.set_interface(**interface)

        ns = ", ".join(
            config.get_mapping("name_servers").format(**system).split(" "))
        search = config.get_mapping("name_servers_search").format(**system)

        kernel_opts = {
            'hostname': hostname,
            'ipaddress': interface['ipaddress'],
            'netmask': interface['subnet'],
            'gateway': interface['gateway'],
            'nameserver': ns,
            'search': search
        }

        cobbler_system.set_kernel_opts(**kernel_opts)

        attributes = [k[4:] for k in dir(cobbler_system) if k[0:4] == "set_"]

        for attribute in attributes:
            try:
                value = config.get_mapping(attribute).format(**system)
                getattr(cobbler_system, "set_" + attribute)(value)
                print "Setting %s:\n%s\n" % (attribute, value)
            except:
                continue  # no biggie, not a required param

        cobbler_system.set_ks_meta(**dict([(k.lower().replace(" ", "_"), v)
                                           for k, v in system.iteritems()]))

        cobbler_system.save()
        print "System saved!"

        if config.update_custom_info:
            try:
                post_trigger(options.config_file, hostname=hostname)
                print "System custom info in Satellite updated!"
            except:
                pass

        print "-----------------------------------------------------------"

    if config.cobbler_sync:
        """
        Sync cobbler
        """
        print "Syncing cobbler..."
        cobbler_system.sync()
        print "Done."
        print "-----------------------------------------------------------"