def save_job_results(geocoder, job_id):
        """
        Download and save to S3 results for completed jobs.
        """
        logging.info('Saving results for %s to S3' % job_id)
        finished_folder = 'geocode_finished_jobs'
        pending_folder = 'geocode_pending_jobs'

        connection = boto.connect_s3()
        bucket = connection.get_bucket(GEO_BUCKET)
        old_key = bucket.get_key('%s/%s' % (pending_folder, job_id))

        new_name = old_key.get_contents_as_string()
        new_key = Key(bucket)
        new_key.key = '%s/%s' % (finished_folder, new_name)

        results = geocoder.get_job_results(job_id)
        result_string = StringIO.StringIO()
        writer = DictWriter(result_string, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)
        result_string.seek(0)

        email_address = old_key.get_metadata('email')
        if email_address:
            new_key.set_metadata('email', email_address)
            send_email_notification(
                email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished')

        new_key.set_contents_from_string(result_string.getvalue())
        new_key.make_public()
        old_key.delete()
def write_csv(output_file, address_dicts):
    geocoded_file = open(output_file, 'wb')
    writer = DictWriter(geocoded_file, fieldnames=address_dicts[0].keys(), \
        dialect='excel', lineterminator='\n')
    writer.writeheader()
    writer.writerows(address_dicts)
    geocoded_file.close() 
    def test_01_importTab(self):
        #先检查表,如果都没有则直接创建
        #client.drop_database(DB_INFO['DB_NAME'])
        SD.importTab()
        SD.copy_table()
        self.assertIn('FACT_ATTR', db.collection_names())
        self.assertIn('FACT_SERVICE', db.collection_names())
        self.assertIn('FACT_ATTR_SET', db.collection_names())
        self.assertIn('FACT_SCENE', db.collection_names())

        #如果有,先检查id如果不存在则增加,否则update
        L = list()
        table = 'FACT_ATTR'
        filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table])
        with open(filepath, 'r') as f:
            dReader = DictReader(f)
            L = [i for i in dReader]
        L[-1]['attr_set_id'] = 1
        L[-1]['value'] = 'rampage'
        L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2})
        with open(filepath, 'w') as f:
            titles = L[-1].keys()
            dwriter = DictWriter(f, titles)
            header = dict(zip(titles, titles))
            dwriter.writerow(header)
            dwriter.writerows(L)

        SD.importTab()
        #这里顺便把查询也一起测了
        match = {'_id': {'$in': [3,4]}}
        rs = list(db[table].find(match))
        self.assertEqual(len(rs), 2)
        self.assertEqual(rs[-2]['attr_set_id'], 1)
        self.assertEqual(rs[-2]['value'], 'rampage')
def convert_to_standard():
    m_reader = DictReader(open('url_data.csv'))
    m_writer = DictWriter(open('converted_data.csv', 'wb'), ['topic', 'url', 'index', 'ratings', 'total'])
    li = []
    index = 1
    for row in m_reader:
        newrow = {'topic':row['Topic'],
                  'url':row['URL'],
                  'index':index}
        ratings = [0, 0, 0, 0, 0]
        total = 0
        if row['Likert Rating - Microsoft']:
            ratings[int(row['Likert Rating - Microsoft']) - 1] += 1
            total += 1
        if row['Likert Rating - EPFL']:
            if int(row['Likert Rating - EPFL']) == 0:
                continue
            ratings[int(row['Likert Rating - EPFL']) - 1] += 1
            total += 1
        newrow['ratings'] = ','.join([str(r) for r in ratings])
        newrow['total'] = total
        index += 1
        li.append(newrow)
        print newrow
#        print row['Topic']+' '+row['Query'] + ' ' + row['URL'] + ' ' + row['Likert Rating - Microsoft'] + ' ' + row['Likert Rating - EPFL']
    m_writer.writerows(li)
Exemple #5
0
def run(args):

    opts = parse_args(args)
    opts.prefix = opts.prefix or opts.graph.split('.', 1)[0]

    sheets = {}
    sheet_headers = {}

    try:
        with file(opts.graph) as csvfile:
            for row in reader(csvfile):
                fv = dict(column.split('=', 1) for column in row)
                entity_type = fv.pop('Entity Type')
                headers = fv.keys()
                if entity_type not in sheets:
                    sheets[entity_type] = [fv]
                    sheet_headers[entity_type] = set(headers)
                    continue
                else:
                    sheets[entity_type].append(fv)
                if len(headers) > len(sheet_headers[entity_type]):
                    sheet_headers[entity_type].union(headers)

        for entity_type in sheets:
            with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile:
                csv = DictWriter(csvfile, sheet_headers[entity_type])
                csv.writeheader()
                csv.writerows(sheets[entity_type])
    except IOError, e:
        print 'csv2sheets: %s' % e
        exit(-1)
Exemple #6
0
def main(infile, outfile):
    with open(infile) as inf, open(outfile, "w") as outf:
        r = DictReader(inf)
        rows = [process(row) for row in r]
        w = DictWriter(outf, fieldnames=rows[0].keys())
        w.writeheader()
        w.writerows(rows)
Exemple #7
0
 def write_file(self, event_list, output_dir, idx=None, **kwargs):
     from csv import DictWriter, excel_tab
     # flatten
     cols = []
     for ev in event_list:
         cols += [k for k in ev.keys() if not k in cols]
     # make sure there's a fidl dir to write to
     od = os.path.join(output_dir, 'txt')
     ensure_dir(od)
     # save
     f_name = 'events'
     if idx != None:
         f_name += '_' + str(idx)
     f_name += '.txt'
     out_path = os.path.join(od, f_name)
     cols.sort()
     with open(out_path, 'wb') as out_file:
         dw = DictWriter(out_file,
                         cols,
                         restval='',
                         extrasaction='raise',
                         dialect=excel_tab)
         dw.writer.writerow(cols)
         dw.writerows(event_list)
     return os.path.abspath(out_path)
def saveStockCsv():
    global dicts
    saveFile = open(csvFile, "w")
    writer = DictWriter(saveFile, dicts[0].keys(), lineterminator='\n')
    writer.writeheader()
    writer.writerows(dicts)
    saveFile.close()
Exemple #9
0
def job_result_csv(job_id):
    db_session = db.get_session()
    db_job = db_session.query(PersistentJob).get(job_id)
    if not db_job:
        return json_error('no task exists with id: {0}'.format(job_id))
    celery_task = Job.task.AsyncResult(db_job.result_key)
    if celery_task.ready():
        task_result = celery_task.get()
        
        csv_io = StringIO()
        if task_result:
            # if task_result is not empty find header in first row
            fieldnames = ['user_id'] + sorted(task_result.values()[0].keys())
        else:
            fieldnames = ['user_id']
        writer = DictWriter(csv_io, fieldnames)
        
        task_rows = []
        # fold user_id into dict so we can use DictWriter to escape things
        for user_id, row in task_result.iteritems():
            row['user_id'] = user_id
            task_rows.append(row)
        writer.writeheader()
        writer.writerows(task_rows)
        app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue())
        return Response(csv_io.getvalue(), mimetype='text/csv')
    else:
        return json_response(status=celery_task.status)
def create_access_ports_csv(auth, device_list_with_target_vlan, target_vlan):
    access_interfaces_list = []
    for device in device_list_with_target_vlan:
        #print (device)
        accessinterfaces = get_device_access_interfaces(auth.creds, auth.url, devid=device['id'])
        try:
            for interface in accessinterfaces:
                if (interface['pvid'] == str(target_vlan)):
                    #print (interface)
                    imcint = IMCInterface(device['ip'], interface['ifIndex'], auth.creds, auth.url)
                    interface_info = {'sysname': imcint.sysname,
                                      'sysdesc': imcint.sysdescription,
                                      'syslocation': imcint.location,
                                      'intname': imcint.name,
                                      'intdescription': imcint.description,
                                      'status': imcint.status,
                                      'lastchange': imcint.lastchange,
                                      'inttype': 'access',
                                      'pvid': interface['pvid']}
                    #print(interface_info)
                    access_interfaces_list.append(interface_info)
        except:
            pass  #prevents crashing when device is not present or data doesn't exist in database
    #print (access_interfaces_list)
    keys = access_interfaces_list[0].keys()
    for i in access_interfaces_list:
        if len(i) != len(access_interfaces_list[0].keys()):
            keys = access_interfaces_list[access_interfaces_list.index(i)].keys()
    with open('access_interfaces_vlan_' + str(target_vlan) + '.csv', 'w', newline='') as csvfile:
        writer = DictWriter(csvfile, fieldnames=keys)
        writer.writeheader()
        writer.writerows(access_interfaces_list)
Exemple #11
0
def output_results(poi_result_set, screen=True, outfile=None):
    """
    Outputs unified DBSCAN results to screen or csv file.
    The screen only shows major data elements. The CSV file has the 
    complete dictionary (i.e., base dictionay plus ZOA attributes for each POI)
    """
    assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple'

    if screen:
        print "\nZOAs by POI"
        print "="*80,
        for poi in poi_result_set:                
            print "\nLocation:\t%s" % poi[s.NAME_KEY]
            print "Address:\t%s" % poi[s.ADDR_KEY]
            print "Neighborhood:\t%s" % poi[s.NBHD_KEY]
            print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY])
            print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] 
        
    if outfile:
        assert isinstance (outfile, str), "Outfile name is not a string: %r" % name
        if outfile[-4:] != '.csv': outfile += '.csv'
        with open(outfile, 'wb') as f:
            target = DictWriter(f, poi_result_set[0].keys())
            target.writeheader()
            target.writerows(poi_result_set)
        print "\nWrote output to %s.\n" % outfile
def test_behavior_strategy(b: Behavior, s: Strategy, size=20):
    TRIALS = 10**2
    results = []
    start = time()
    dynamic = False
    for _ in range(TRIALS):
        r = MemoryManager(s, size, dynamic).handle_string(generate_list(b))
        results.append(r)
    end = time()
    avg_time = (end - start)/TRIALS
    print('Average time: ', avg_time)
    print('Minimum no. page faults: ', min(results))
    print('Maximum no. page faults: ', max(results))
    avg = sum(results)/len(results)
    print('Average no. page faults: ', avg)
    with open('benchmarks.csv', 'r') as record_file:
        data = DictReader(record_file)
        entries = [i for i in data]
    entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults']
    new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)}
    entries.append(new_entry)
    entries = sorted(entries, key=itemgetter('Behavior', 'Strategy'))
    with open('benchmarks.csv', 'w', newline='') as record_file:
        writer = DictWriter(record_file, entry_fields)
        writer.writeheader()
        writer.writerows(entries)
Exemple #13
0
def dump_csv(pages, options):
    """Dump in CSV format.

    ``pages`` is an iterable of (field, value) tuples.

    It's assumed that the same fields are used in each tuple.
    """
    from itertools import chain
    from csv import DictWriter
    from sys import stdout
    pages = iter(pages)
    try:
        first_row = pages.next()
    except StopIteration:
        return
    fields = [item[0] for item in first_row]
    rows = chain((first_row,), pages)
    dicts = (dict(page) for page in rows)
    dicts = (process_page(row) for row in dicts)

    def validate_row_length(row_dict):
        if len(row_dict) != len(fields):
            raise DataValidationError(
                'Inconsistent number of fields in row {0}.\n'
                'Fields: {1}'.format(row_dict, fields))
        return row_dict
    dicts = (validate_row_length(row) for row in dicts)

    writer = DictWriter(stdout, fields, dialect='excel-tab')
    writer.writerow(dict((v, v) for v in fields))
    writer.writerows(dicts)
Exemple #14
0
def write_csv(filename, data, header):
    from csv import DictWriter
    with open(filename, 'wb') as fp:
        writer = DictWriter(fp, fieldnames=header, delimiter=';')
        writer.writeheader()
        writer.writerows(data)
    return
    def write_rows(self, entries):
        """Write rows in the csv file"""

        with open(self.output_filename, 'w') as output_file:
            writer = DictWriter(output_file, FIELDNAMES)
            writer.writeheader()
            writer.writerows(entries)
Exemple #16
0
def run(out_file, input_dirs):
    assert input_dirs
    files = []
    for input_dir in input_dirs:
        for filename in listdir(input_dir):
            if filename.endswith(".log"):
                files.append(join(input_dir, filename))
    assert files

    raw_data = sorted((get_data(filename) for filename in files), key=data_key)

    aggregated_data = (
        indy_vars + aggregate_data(group) for (_key, indy_vars), group in
        groupby(raw_data, partial(data_key, include_id=False))
    )

    with open(out_file, "w") as f:
        out = DictWriter(f, independent_vars + dependent_vars)
        out.writeheader()
        out.writerows(raw_data)
    del out

    with open("-aggregate".join(splitext(out_file)), "w") as f:
        out = writer(f)
        out.writerow(independent_vars[:-1] + ("count",) + dependent_vars)
        out.writerows(aggregated_data)
def write_output():
    with open(
        "urls-used-for-local-transactions-with-statuses-and-jumbled-urls-and-pageviews.csv", "w", encoding="utf8"
    ) as output:
        writer = DictWriter(output, fields)
        writer.writeheader()
        writer.writerows(urls_used_with_pageviews)
def report_result_csv(result_key):
    celery_task, pj = get_celery_task(result_key)
    if not celery_task:
        return json_error('no task exists with id: {0}'.format(result_key))
    
    if celery_task.ready():
        task_result = get_celery_task_result(celery_task, pj)
        
        csv_io = StringIO()
        if task_result:
            columns = []
            
            if Aggregation.IND in task_result:
                columns = task_result[Aggregation.IND][0].values()[0].keys()
            elif Aggregation.SUM in task_result:
                columns = task_result[Aggregation.SUM].keys()
            elif Aggregation.AVG in task_result:
                columns = task_result[Aggregation.AVG].keys()
            elif Aggregation.STD in task_result:
                columns = task_result[Aggregation.STD].keys()
            
            # if task_result is not empty find header in first row
            fieldnames = ['user_id'] + columns
        else:
            fieldnames = ['user_id']
        writer = DictWriter(csv_io, fieldnames)
        
        # collect rows to output in CSV
        task_rows = []
        
        # Individual Results
        if Aggregation.IND in task_result:
            # fold user_id into dict so we can use DictWriter to escape things
            for user_id, row in task_result[Aggregation.IND][0].iteritems():
                task_row = row.copy()
                task_row['user_id'] = user_id
                task_rows.append(task_row)
        
        # Aggregate Results
        if Aggregation.SUM in task_result:
            task_row = task_result[Aggregation.SUM].copy()
            task_row['user_id'] = Aggregation.SUM
            task_rows.append(task_row)
        
        if Aggregation.AVG in task_result:
            task_row = task_result[Aggregation.AVG].copy()
            task_row['user_id'] = Aggregation.AVG
            task_rows.append(task_row)
        
        if Aggregation.STD in task_result:
            task_row = task_result[Aggregation.STD].copy()
            task_row['user_id'] = Aggregation.STD
            task_rows.append(task_row)
        
        writer.writeheader()
        writer.writerows(task_rows)
        return Response(csv_io.getvalue(), mimetype='text/csv')
    else:
        return json_response(status=celery_task.status)
def get_csv_object(data_rows, schema, include_header=False):
    fieldnames = [x['name'] for x in schema]
    file_obj = StringIO()
    writer = DictWriter(file_obj, fieldnames=fieldnames)
    if include_header:
        writer.writeheader()
    writer.writerows(data_rows)
    return file_obj
def main(start_spd, end_spd, steps, direction):
    """
    Performs a motor deadband test, prints the result, and saves the data to a csv.
    """
    read_frq = 300  # Frequency of reads
    settle_period = 2 # Specify time (s) to let motor settle before assessing deadband
    moving_threshold = 10  # Number of ticks/second to consider the motor to have left the deadband

    comms = PIC_USB(0x0005)  
    read_period = 1/float(read_frq)

    motor_speed = start_spd
    motor_dir = direction
    data = []
    prev_read_quad_time = time.clock()
    prev_quad_counter = comms.get_quad_info()["counter"]
    speeds = range(start_spd, end_spd, steps)
    for spd in speeds:
        set_start = time.clock()
        print("\tSetting motor speed to {}.".format(spd))
        comms.command_dcmotor(spd, motor_dir)
        motor_speed = spd

        while time.clock() - set_start < settle_period:
            this_read = {}
            read_start = time.clock()
            quad_info = comms.get_quad_info()
            this_read["time"] = read_start
            this_read["motor_speed_cmd"] = motor_speed
            this_read["motor_dir_cmd"] = motor_dir
            this_read["quad_counter"] = quad_info["counter"]
            this_read["quad_overflow"] = quad_info["overflow"]
            this_read["quad_calc_speed"] = abs(quad_info["counter"] - prev_quad_counter) / (read_start - prev_read_quad_time)
            data.append(this_read)
            prev_read_quad_time = read_start
            prev_quad_counter = quad_info["counter"]
            while time.clock() - read_start < read_period:
                pass
        # After motor has "settled"
        settle_check_time = time.clock()
        quad_info = comms.get_quad_info()
        settle_vel = abs(quad_info["counter"] - prev_quad_counter) / (settle_check_time - prev_read_quad_time)      
        if settle_vel > moving_threshold:
            print("\tDeadband end identified at speed {}.".format(spd))
            break

    print("\tTest Concluded. Writing data...")

    filename = "deadband_{0}-{1}_steps{2}_{3}".format(start_spd, end_spd, steps, direction)
    headers = ["time", "motor_speed_cmd", "motor_dir_cmd", "quad_counter", "quad_overflow", "quad_calc_speed"]

    with open("data/{}.csv".format(filename), 'wb') as out_data:
        writer = DictWriter(out_data, fieldnames=headers)
        writer.writeheader()
        writer.writerows(data)

    print("\tData saved to \"data/{}.csv\"".format(filename))
    print("ENDING DEADBAND TEST")
Exemple #21
0
    def toCSV(self, fields, data):
        dialect = csv.excel()
        dialect.delimiter = self.getDelimiter()
        buffer = StringIO()
        writer = DictWriter(buffer, fieldnames=fields, dialect=dialect)
	if self.getShowHeader():
            writer.writerow(dict(zip(fields, fields)))
        writer.writerows(data)
        return buffer.getvalue()
def add_snapshot():
    m_reader = DictReader(open('alchemy_data.csv'))
    li = []
    for row in m_reader:
        newrow = row
        newrow['snapshot'] = '/static/snapshots/l/url'+row['index']+'.jpg'
        li.append(newrow)
    m_writer = DictWriter(open('data1.csv', 'wb'), ['topic', 'url', 'index', 'ratings', 'total', 'snapshot'])
    m_writer.writerows(li)
Exemple #23
0
def writeToFile():
    temp = sorted(content, key=itemgetter('trackNumber'))
    temp = sorted(temp, key=itemgetter('album'))
    temp = sorted(temp, key=itemgetter('artist'))
    with open('musiclibrary.csv','w') as outfile:
        # outfile.write(u'\ufeff'.encode('utf-8'))
        writer = DictWriter(outfile, ('artist','album','trackNumber','name'))
        writer.writeheader()
        writer.writerows(temp)
    def writelog(self, f):
        '''Write log to f - f can be a filename or a file opened for writing'''
        header, log = self.loglines()
        try:
            dw = DictWriter(f, header)
        except TypeError:
            dw = DictWriter(open(f, 'w'), header)

        dw.writer.writerow(header)
        dw.writerows(log)
def main(start_vel, end_vel):
    """
    Performs a step response test and saves the data to a csv.
    """
    test_len = 10  # Length of test in seconds.
    read_frq = 100  # Frequency of reads
    step_time = 1  # Time (in sec) at which to perform step.

    comms = PIC_USB(0x0005)  
    read_period = 1/float(read_frq)

    STEPPED = False
    motor_speed = start_vel[0]
    motor_dir = start_vel[1]
    print("STARTING STEP RESPONSE TEST")
    data = []
    print("\tStarting at speed {}, dir {}".format(motor_speed, motor_dir))
    comms.command_dcmotor(motor_speed, motor_dir)
    test_start_time = time.clock()
    prev_read_quad_time = test_start_time
    prev_quad_counter = comms.get_quad_info()["counter"]
    while time.clock() - test_start_time < test_len:
        read_start = time.clock()
        this_read = {}
        if read_start > step_time and not STEPPED:
            motor_speed = end_vel[0]
            motor_dir = end_vel[1]
            print("\tStepping to speed {}, dir {}".format(motor_speed, motor_dir))
            comms.command_dcmotor(motor_speed, motor_dir)
            STEPPED = True
        read_quad_time = time.clock()
        quad_info = comms.get_quad_info()
        this_read["time"] = read_quad_time
        this_read["motor_speed_cmd"] = motor_speed
        this_read["motor_dir_cmd"] = motor_dir
        this_read["quad_counter"] = quad_info["counter"]
        this_read["quad_overflow"] = quad_info["overflow"]
        this_read["quad_calc_speed"] = abs(quad_info["counter"] - prev_quad_counter) / (read_quad_time - prev_read_quad_time)
        data.append(this_read)
        prev_read_quad_time = read_quad_time
        prev_quad_counter = quad_info["counter"]
        while time.clock() - read_start < read_period:
            pass
    print("\tTest Concluded. Writing data...")

    filename = "step_response_{0[0]}-{0[1]}_to_{1[0]}-{1[1]}".format(start_vel, end_vel)
    headers = ["time", "motor_speed_cmd", "motor_dir_cmd", "quad_counter", "quad_overflow", "quad_calc_speed"]

    with open("data/{}.csv".format(filename), 'wb') as out_data:
        writer = DictWriter(out_data, fieldnames=headers)
        writer.writeheader()
        writer.writerows(data)

    print("\tData saved to \"data/{}.csv\"".format(filename))
    print("ENDING STEP RESPONSE TEST")
Exemple #26
0
def print_results(results):
    dict_results = [r.to_dict() for r in results]
    fieldnames = ['concurrency', 'total_mean', 'total_median', 'total_max', 'percentage_failed']

    f = StringIO()
    writer = DictWriter(f, fieldnames, restval='', extrasaction='ignore')
    writer.writeheader()
    writer.writerows(dict_results)

    f.seek(0)
    print '\n'
    print f.read()
def list_of_dicts_to_csv(table_id, list_of_dicts):
    csv = StringIO.StringIO()
    cols = table_cols(table_id)
    logging.debug("Creating CSV using cols %s" % ','.join(cols))
    w = DictWriter(csv, cols)
    for dict in list_of_dicts:
        for key, value in dict.iteritems():
            if isinstance(value, unicode):
                dict[key] = value.encode('utf8')
    w.writerows(list_of_dicts)
    logging.debug("Created CSV %s" % csv.getvalue())
    return csv
Exemple #28
0
 def _convert_csv(self, notes):
     if self.stdout:
         simple_file = StringIO()
     else:
         simple_file = open(self.simple_filename, 'w')
     writer = DictWriter(simple_file, self.fieldnames)
     writer.writerows(notes)
     if self.stdout:
         simple_file.seek(0)
         # XXX: this is only for the StringIO right now
         sys.stdout.write(simple_file.getvalue())
     simple_file.close()
def write_out(stats_list):
    """
        writes stats to CSV
        :param stats_list: list of dicts
        :return:
        """
    with open(output_filename, "a") as output_file:
        writer = DictWriter(
            output_file, fieldnames=output_fieldnames, delimiter=",", extrasaction="ignore", lineterminator="\n"
        )
        writer.writerows(stats_list)
    print("Wrote {} lines to {}".format(len(stats_list), output_filename))
Exemple #30
0
def export_data(fn, messages):

    log.name("exporter").debug("Exporting data")

    ## fieldnames
    keys_ = SMS_PROPERTIES + TEXT_PROPERTIES + SINGLE_ENTITIES_ELEM + COMPLEX_ENTITIES_ELEM + ["tvb"] + EMOTICONS.keys()

    writer = DictWriter(open(fn, "w"), fieldnames=keys_, delimiter=",", quotechar='"', quoting=QUOTE_ALL)

    writer.writeheader()
    log.name("exporter").debug("Preparing and saving data")
    writer.writerows([prepare_message_data(m) for m in messages])
    log.name("exporter").debug("Data exported and saved into {file_name}", file_name=fn)
Exemple #31
0
    def rewrite_roomers(self):
        if len(self.roomers):
            names = []
            people = []
            for i in self.roomers:
                people.append(i.nnn())
            for i in people[0]:
                names.append(i)
            with open('.\\data\\roomers.csv', 'w') as file:
                writer = DictWriter(file, fieldnames=names, delimiter=';')
                writer.writeheader()
                writer.writerows(people)

        else:
            with open('.\\data\\roomers.csv', 'w') as file:
                pass
Exemple #32
0
 def to_csv(self):
     """ Write the CSVOutput to a csv file """
     if self.data_type == 'pandas':
         self.records.to_csv(self.output)
     elif self.data_type == 'numpy':
         np.savetxt(self.output, self.records, delimiter=",")
     elif isinstance(self.records[0], dict):
         with open(self.output, 'w') as output:
             wrtr = DictWriter(output, fieldnames=self.records[0].keys())
             wrtr.writeheader()
             wrtr.writerows(self.records)
     else:
         with open(self.output, 'w') as output:
             wrtr = writer(output)
             wrtr.writerows(self.records)
     return self.output
Exemple #33
0
    def handle(self, *args, **options):
        if len(args) == 0:
            raise CommandError('AMI catalog filename is not specified.')

        with open(args[0], 'w') as csvfile:
            writer = DictWriter(csvfile,
                                fieldnames=('name', 'region', 'backend_id'))
            writer.writeheader()
            rows = [
                dict(
                    name=image.name,
                    region=image.region.name,
                    backend_id=image.backend_id,
                ) for image in models.Image.objects.all()
            ]
            writer.writerows(rows)
Exemple #34
0
def main():
    train_iter = next_row(read_tsv("data/stream_%s.tsv"%args.sz))
    test_iter = iter([])
    sinfo_iter = read_tsv("data/sinfo_%s.tsv"%args.sz)
    del_keys_set = ["HistCTR", "SearchID", "ObjectType"]

    for t, (data_type, rows, sinfo) in enumerate(data(train_iter=train_iter, test_iter=test_iter, sinfo_iter=sinfo_iter)):
        uid = int(sinfo["UserID"])
        date_str = sinfo["SearchDate"]
        ts = convert_ts(datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.0"))

        rows = filter(lambda x : int(x["ObjectType"]) == 3, rows)
        for row in rows:
            for key in del_keys_set:
                del row[key]
            for key in row:
                row[key] = int(row[key]) if row[key] != "" else 0
        item = (
                ts,
                int(sinfo["SearchID"]),
                tuple([(row["AdID"], row["IsClick"], row["Position"]) for row in rows]),
                )
        uid_sid[uid].append(item)

    print "uid_sid: %s"%len(uid_sid)
    for uid in uid_sid:
        uid_sid[uid].sort()

    print "start user_cnt."
    file_name = "data/user_cnt_%s.csv"%args.sz
    with open(file_name, "w") as f:
        writer = DictWriter(f, fieldnames=["SearchID", "t_cnt", "bf_cnt", "af_cnt", "bf_3h_cnt", "af_3h_cnt", "bf_clk_cnt", "bag2", "bag1"])
        writer.writeheader()
        for uid in uid_sid:
            all_se = uid_sid[uid]
            writer.writerows(get_rows(all_se))
    os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name))

    print "start user_aid_cnt."
    file_name = "data/user_aid_cnt_%s.csv"%args.sz
    with open(file_name, "w") as f:
        writer = DictWriter(f, fieldnames=["SearchID", "AdID", "clk_cnt", "show_cnt", "t_show_cnt", "pos_clk_cnt", "pos_show_cnt"])
        writer.writeheader()
        for uid in uid_sid:
            all_se = uid_sid[uid]
            writer.writerows(get_aid_rows(uid, all_se))
    os.system('sort -t"," -k1 -g -S 2G %s -o %s_sorted'%(file_name, file_name))
Exemple #35
0
class CsvDumper:
    """This class provides convenience to CSV dumping via Context Manager."""
    def __init__(self, filename, fieldnames, logger=None):

        #: filename for the output file where the data will be dumped
        self.filename = filename

        #: list of fieldnames that the CSV file will follow
        self.fieldnames = fieldnames

        #: file handle for writing to the output
        self.open_file = open(self.filename, 'a')

        #: :class:`DictWriter <DictWriter>` to handle writing dicts
        self.dict_writer = DictWriter(self.open_file,
                                      self.fieldnames,
                                      dialect='unix',
                                      quoting=QUOTE_NONNUMERIC)

        #: :class:`Logger <Logger>`
        self.logger = logger or logging.getLogger(__name__)

    def __enter__(self):
        self._write_csv_headers()
        return self

    def __exit__(self, *args):
        self.open_file.close()

    def _write_csv_headers(self):
        """Writes the CSV data headers into the file if it's still empty."""

        if self.open_file.tell() == 0:
            self.dict_writer.writeheader()

    def dump(self, list_of_dicts):
        """Dumps the list of dicts into the CSV file for this instance.

        :param list_of_dicts: As it says.
        """

        if len(list_of_dicts) == 0:
            return

        self.dict_writer.writerows(list_of_dicts)

        self.logger.info("Data collected and dumped: {}".format(self.filename))
Exemple #36
0
 def export_to_csv(self, sql):
     output = StringIO()
     cursor = self.get_cursor()
     with self.log_query(sql):
         cursor.execute(sql)
         data = cursor.fetchall()
     writer = DictWriter(
         output,
         fieldnames=[column[0] for column in cursor.description],
         lineterminator="\n",
     )
     writer.writeheader()
     writer.writerows(data)
     result = output.getvalue().encode()
     output.close(
     )  # StringIO doesn't support context manager protocol on Python 2
     return result
Exemple #37
0
    def export_to_CSV(self):
        """!
        Exports the generated contact list to a CSV file.
        """

        try:
            # print("Attempting to save to", self.file_name+".csv")
            with open(self.file_name+".csv", 'w', encoding='utf8', newline='') as output_file:
                fc = DictWriter(output_file, fieldnames=self.contact_list[0].keys(),)
                fc.writeheader()
                fc.writerows(self.contact_list)
        except IOError:
            messagebox.showerror("Error", "Unable to access "+ self.file_name + ".csv. Please close the file if it is open.")
            print("Unable to access", self.file_name+".csv.", "Please close the file if it is open.")
            return 1
        print("Contact list has been saved.")
        return 0
Exemple #38
0
 def bulkload_lod(self, lod, keys, tablename, tmp_prefix='/tmp/'):
     filename = tmp_prefix + datetime.now().strftime("%s") + '.csv'
     register_dialect('own',
                      'excel',
                      delimiter=',',
                      lineterminator='\n',
                      quoting=QUOTE_NONNUMERIC)
     with open(filename, 'w', encoding='utf-8') as f:
         wr = DictWriter(f, keys, dialect='own')
         wr.writerows(lod)
     with open(filename, 'rt', encoding='utf-8') as f:
         lines = f.readlines()
         sublines = [sub('""', '', i) for i in lines]
     with open(filename, 'w', encoding='utf-8') as f:
         f.writelines(sublines)
     self.bulkload(tablename, filename)
     return filename
Exemple #39
0
def assign_ids(f):
    """Function that does the bulk of the processing. Definitely too long and
    needs to be split out to smaller functions, oh well. Outputs the
    matched data to a staging folder

    Keyword Arguments:
        f -- name of the file to process
    """
    test_file_path = os.path.join(Dirs.TEST_DIR, f)
    staging_file_path = os.path.join(Dirs.STAGING_DIR, f)

    with open(test_file_path, 'r', encoding='utf-16') as r, \
         open(staging_file_path, 'w', encoding='utf-16') as w:
        reader = DictReader(r, dialect='excel-tab')
        rows = list(reader)
        fields = reader.fieldnames
        # ocdid_report is not included sometimes, and additional fields are
        # occassionally added.
        if 'ocdid_report' not in fields:
            fields.append('ocdid_report')
        writer = DictWriter(w, fieldnames=fields, dialect='excel-tab')
        writer.writeheader()

        ocdid_vals = {}
        unmatched = {}
        matched = []

        for row in list(rows):
            row['OCDID'] = row['OCDID'].lower()
            ocdid = row['OCDID']
            if ocdid == '':
                message = '{} / {} ({}) has no OCDID.'
                print(message.format(row['Person UUID'],
                                     row['Electoral District'],
                                     row['State']))
                rows.remove(row)

            matched.append(row)

        matched.sort(key=lambda x: x['Person UUID'])
        try:
            writer.writerows(matched)
        except ValueError:
            print(matched[0])
            print([match for match in matched if None in list(match)][0])
            raise
Exemple #40
0
def search_twitter(keyword, max_pages, twitter_config, csv_directory, **kwargs):
    """Searches Twitter for tweets with the passsed in keyword and stores the
    the result in the data folder locally as a csv file.
    
    Arguments:
        keyword {string} -- keyword to be used to search for relevant tweets
    """
    config = ConfigParser()
    config.read(twitter_config)
    auth = OAuthHandler(config.get('twitter', 'consumer_key'),
                        config.get('twitter', 'consumer_secret'))
    auth.set_access_token(config.get('twitter', 'access_token'),
                          config.get('twitter', 'access_token_secret'))
    api = API(auth)

    all_tweets = []
    
    page_num = 0

    # use the {{ ds }} = execution date variable passed in as part of context provided
    # by Airflow through Jinja macros
    execution_date = kwargs.get('ds')
    since_date = datetime.strptime(execution_date, '%Y-%m-%d').date() - timedelta(hours=1)
    query += ' since:{} until:{}'.format(since_date.strftime('%Y-%m-%d'), kwargs.get('ds'))

    print('Searching Twitter with: %s' % keyword)

    for page in Cursor(api.search, q=keyword, monitor_rate_limit=True, wait_on_rate_limit=True).pages():
        all_tweets.extend([extract_tweet_data(t, keyword, ) for t in page])
        page_num += 1
        if page_num > max_pages:
            break
    
    # If it is an empty result, stop here
    if not len(all_tweets):
        return

    if not os.path.exists(csv_directory):
        os.makedirs(csv_directory)
    
    filename = '{}/{}_{}.csv'.format(csv_directory, keyword, datetime.now().strftime('%m%d%Y%H%M%S'))

    with open(filename, 'w') as raw_file:
        raw_writer = DictWriter(raw_file, fieldnames=all_tweets[0].keys())
        raw_writer.writeheader()
        raw_writer.writerows(all_tweets)
    def scrapAdress(self, response):

        #Récupération des adresses

        if self.a:
            nbadressesbypage = self.a
        else:
            nbadressesbypage = 2

        service = response.meta['service']
        page = response.meta['page']

        #for adresse in np.array(response.xpath("//td/a/@href").re(r'[13][a-km-zA-HJ-NP-Z1-9]{25,34}')[0:self.nbAdressByPage]):
        for adresse in np.array(
                response.xpath("//td/a/@href").re(
                    r'[13][a-km-zA-HJ-NP-Z1-9]{25,34}')
            [0:int(nbadressesbypage)]):
            yield {
                'service': service,
                'adresse': adresse,
                'service': service,
                'page': page
            }
            self.adressDictionnary['adresses'].append({
                'service': service,
                adresse: service,
                'page': page
            })
            self.adressTab.append({
                'service': service,
                'adresse': adresse,
                'page': page
            })

        jsonObject = json.dumps(self.adressDictionnary)
        f = open("adresses.json", "w")
        f.write(jsonObject)
        f.close()

        dicts = json.dumps(self.adressDictionnary)
        dictjson = json.loads(dicts)
        the_file = open("sample.csv", "w")
        writer = DictWriter(the_file, dictjson[0].keys())
        writer.writeheader()
        writer.writerows(dicts)
        the_file.close()
Exemple #42
0
def export_sections_to_csv(filename_or_object, sections):
    if isinstance(filename_or_object, str):
        f = open(filename_or_object, 'w', newline='')
        opened = True
    else:
        f = filename_or_object
        opened = False

    try:
        writer = DictWriter(f, ['section'] + FIELDS)
        writer.writeheader()
        writer.writerows(chain.from_iterable(map(_section2rows, sections)))
    except Exception:
        raise
    finally:
        if opened:
            f.close()
Exemple #43
0
def save_delivery(delivery, available):
    deliveries = get_deliveries()
    if deliveries:
        for index, delivery_ in enumerate(deliveries):
            if delivery == delivery_.get('delivery'):
                deliveries[index]['available'] = available
                break
        with open(join(app.root_path, "data", "delivery.csv"), "w",
                  newline="") as csv_file:

            writer = DictWriter(csv_file, ['delivery', 'available'])
            writer.writeheader()
            writer.writerows(deliveries)

        return True
    else:
        return False
Exemple #44
0
def main():
    parser = OptionParser(usage='%prog [options] infile targetname')

    parser.add_option('-f', '--force', action='store_true', default=False, help='overwrite files in current target directory, if exist') 
    options, args = parser.parse_args() 

    if len(args) != 2: parser.error('wrong number of positional arguments')

    infile = args[0]
    targetname = args[1]

    if not(exists(infile)):
        print >>sys.stderr, 'File %s not found.' % infile 
        sys.exit(1)

    if not(exists(targetname)) or not(isdir(targetname)):
        print >>sys.stderr, 'Target directory %s not found.' % targetname 
        sys.exit(1)


    # reading from file 
    instream = open(infile) 
    infieldnames = ['filename', 'lineno', 'score']
    reader = DictReader(instream, fieldnames=infieldnames)
    entries = list(reader)
    instream.close()

    # sorting by filename
    ftable = {}
    for e in entries:
        insertEntry(ftable, e) 
    
    # putting this information into separate files in directory tree
    outfieldnames = ['lineno', 'score'] 
    for k in ftable.keys():
        outfile = join(targetname, ".".join([k, 'scores', 'raw']))

        if exists(outfile) and not(options.force):
            print >>sys.stderr, 'File %s already exists.' % outfile
            print >>sys.stderr, 'Use --force to overwrite.' 
            sys.exit(1)

        outstream = open(outfile, 'w')
        writer = DictWriter(outstream, outfieldnames, extrasaction='ignore', lineterminator='\n') 
        writer.writerows(ftable[k])
        outstream.close()
Exemple #45
0
def write_derived_products(papers,
                           base_folder=BASE_FOLDER,
                           issn=ISSN,
                           ignore_book_reviews=False):
    if issn is None or issn == "":
        raise ValueError("ISSN is not defined - can't write out files")

    OUTPUT_FILE = os.path.join(
        base_folder, "{}.csv".format(issn)
    )  # dumps out a CSV with the ISSN as its name in the same directory
    TITLE_FREQUENCY_FILE = os.path.join(
        base_folder, "{}_title_frequency.csv".format(issn)
    )  # dumps out a CSV with the ISSN as its name in the same directory
    AUTHOR_FREQUENCY_FILE = os.path.join(
        base_folder, "{}_author_frequency.csv".format(issn)
    )  # dumps out a CSV with the ISSN as its name in the same directory
    INSTITUTION_FREQUENCY_FILE = os.path.join(
        base_folder, "{}_insitution_frequency.csv".format(issn)
    )  # dumps out a CSV with the ISSN as its name in the same directory

    title_frequency_info = frequency_titles(
        papers, ignore_book_reviews=ignore_book_reviews)
    author_frequency_info = frequency_authors(papers)
    institution_frequency_info = frequency_institutions(
        papers, ignore_book_reviews=ignore_book_reviews)

    print("Writing Paper Info")
    with open(OUTPUT_FILE, 'wb') as output_file_handle:
        csv_writer = DictWriter(output_file_handle, fieldnames=KEYS_TO_KEEP)
        csv_writer.writeheader()
        csv_writer.writerows(papers)

    write_frequencies([{
        "name": "Title",
        "path": TITLE_FREQUENCY_FILE,
        "data": title_frequency_info
    }, {
        "name": "Author",
        "path": AUTHOR_FREQUENCY_FILE,
        "data": author_frequency_info
    }, {
        "name": "Institution",
        "path": INSTITUTION_FREQUENCY_FILE,
        "data": institution_frequency_info
    }])
Exemple #46
0
def write_input_spreadsheet(data_units, outfile):
    # Merge all the keys to prepare the CSV headers
    headers = set([k for d in data_units for k in d.keys()])
    # Specific field for test (gold) units
    headers.add('_golden')
    headers = list(headers)
    gold_columns = []
    for header in headers:
        # Add gold answer columns for each chunk
        if re.search('chunk_[0-9]{2}$', header):
            gold_columns.append(header + '_gold')
    headers += gold_columns
    headers.sort()
    logger.debug('CSV headers: %s' % headers)
    writer = DictWriter(outfile, headers)
    writer.writeheader()
    writer.writerows(data_units)
    return 0
def main():
    """The main function."""
    args = parse_args()

    identifiers = set()

    for path in args.json:
        with open(path, 'r') as inf:
            for ast in parse_ast(inf):
                identifiers |= walk_ast_rec(ast)

    identifiers = [dict(identifier=iden, type=iden_type)
                   for iden, iden_type in identifiers]

    csv_writer = CsvDictWriter(sys.stdout,
                               fieldnames=('identifier', 'type'))
    csv_writer.writeheader()
    csv_writer.writerows(identifiers)
Exemple #48
0
def main() -> None:
    session = HTMLSession()
    login(session, input("username: "******"password: "******"fs", "f")  # sort by favorites
    inline_set(session, "dm", "m")  # minimal display mode
    pagecount = scrape_pagecount(session)
    with open(input("output file: "), mode="w") as ofile:
        writer = DictWriter(ofile,
                            fieldnames=CSV_FIELDNAMES,
                            dialect=CSV_DIALECT)
        writer.writeheader()
        for page in range(pagecount)[::-1]:
            favorites = scrape_favorites(session, page)[::-1]
            writer.writerows(map(lambda fav: fav._asdict(), favorites))
            print(f"[page {page}] scraped {len(favorites)} favorites.")
            sleep(SECONDS_PER_PAGE)
    print("all exported. >:3c")
    def _check_point_to_csv(self, send_segment, now):
        # Writes files to the "input" directory so the pusher will find them,
        # archive them, and send them out.

        # The input directory may not have been created yet
        create_dirs(self.input_dir)

        segment_data = self.log_node.parsed_data.pop(send_segment, [])
        if not segment_data:
            return

        file_name = '{}_{}.csv.gz'.format(send_segment.strftime(self.file_fmt),
                                          now.strftime(self.file_fmt))
        file_path = join(self.input_dir, file_name)
        with gz_open(file_path, 'wt') as outfile:
            writer = DictWriter(outfile, CSV_HEADER)
            writer.writeheader()
            writer.writerows(self._format_item(x) for x in segment_data)
Exemple #50
0
def write_csv_file(interval_data, filename):
    """Writes the period configuration structure into a csv file

    Parameters
    ----------
    interval_data: list
        A list of period dicts
    filename: str
        The name of the file to produce
    """
    with open(filename, 'w+', newline='') as csvfile:
        headers = ['name', 'interval']
        writer = DictWriter(csvfile, headers)
        writer.writeheader()
        writer.writerows({
            'name': i['name'],
            'interval': json.dumps(i['interval'])
        } for i in interval_data)
Exemple #51
0
def save_library():
    temp = sorted(content, key=itemgetter('trackNumber'))
    temp = sorted(temp, key=itemgetter('album'))
    temp = sorted(temp, key=itemgetter('artist'))

    # library_filename = raw_input('save file as (do not include .csv or any extension!)\n: ')
    try:
        with open(library_filename + '.csv', 'w') as outfile:
            print("exporting library to CSV format"),
            writer = DictWriter(
                outfile,
                ('artist', 'album', 'trackNumber', 'name', 'playCount'))
            writer.writeheader()
            writer.writerows(temp)
            print('\'' + str(library_filename) + str('.csv') +
                  '\' saved in current directory')
    except IOError:
        sys.exit("invalid filename!")
def append_csv(csv_filename, data):
    with open(csv_filename, 'a+') as csv_file:
        reader = DictReader(csv_file)
        new_keys = set(data.keys()).difference(reader.fieldnames)
        if not new_keys:
            csv_file.seek(0, os.SEEK_END)
            writer = DictWriter(csv_file, reader.fieldnames)
            writer.writerow(data)
        else:
            reader.fieldnames.extend(sorted(new_keys))
            with NamedTemporaryFile(dir='.', delete=False) as csv_tmpfile:
                writer = DictWriter(csv_tmpfile,
                                    reader.fieldnames,
                                    lineterminator='\n')
                writer.writeheader()
                writer.writerows(row for row in reader)
                writer.writerow(data)
            shutil.move(csv_tmpfile.name, csv_filename)
 def save_receipts(self, dest_dir, csv_filename):
     report_data = list()
     for e in self.commute_emails:
         trip_time = e.get_trip_time()
         receipt_filename = "{trip_date}_{trip_time}hrs_{provider}".format(
             trip_date=trip_time.strftime('%Y%m%d'), trip_time=trip_time.time().strftime('%H%M'),
             provider=e.get_provider())
         e.save_receipt(file_name=receipt_filename, save_path=dest_dir)
         src, dest = e.get_source_dest_address()
         invoice_line = dict(trip_date=trip_time.strftime('%Y%m%d'),
                             trip_time="%shrs" % trip_time.time().strftime('%H%M'),
                             start_addr=src, end_addr=dest, fare=e.get_fare())
         report_data.append(invoice_line)
     with open(os.path.join(dest_dir, csv_filename), 'w') as csvfile:
         fieldnames = report_data[0].keys()
         writer = DictWriter(csvfile, fieldnames=fieldnames)
         writer.writeheader()
         writer.writerows(report_data)
Exemple #54
0
    def delete(self, phone_number):
        counterl = 0
        print(phone_number)
        for index, value in enumerate(self.phone):
            print(index, value)
            if value["PhoneNumber"] == phone_number:
                print(index)
                self.phone.pop(index)
                counterl += 1

        if counterl == 0:
            print("Entered phone number does not exists.")
        else:
            with open(filename, "w", newline="") as handler:
                writer = DictWriter(handler, fieldnames=["Name", "PhoneNumber", "Email Address"])
                writer.writeheader()
                writer.writerows(self.phone)
            print("Phone Number deleted successfully")
Exemple #55
0
    def write_csv_result(self):
        out_path = self.out_dir / self.report_file
        new_file = not out_path.exists()

        data_to_store = []
        for value in self.storage.values():
            if isinstance(value, list):
                data_to_store.extend(value)
            else:
                data_to_store.append(value)

        with open(str(out_path), 'a+', newline='') as f:
            writer = DictWriter(f, fieldnames=self.fields)
            if new_file:
                writer.writeheader()
            writer.writerows(data_to_store)

        self._reset_storage()
Exemple #56
0
def to_csv(list_of_dicts: List[dict], file_path: str):
    """
    Write the given list of dicts to a csv at the given file path.

    The dictionaries should have a uniform structure, i.e. they should be parsable
    into the rows of the csv, with the keys equivalent to column names.

    :param list_of_dicts: the list to write to a file
    :type list_of_dicts: list
    :param file_path: the file path at which to create the file
    :type file_path: str
    """

    keys = list_of_dicts[0].keys()
    with open(file_path, 'w', encoding='utf-8') as output_file:
        dict_writer = DictWriter(output_file, keys, lineterminator='\n')
        dict_writer.writeheader()
        dict_writer.writerows(list_of_dicts)
Exemple #57
0
 def get_product_details(self, response):
     for i in range(1000):  #can be vary depending on the pages
         try:
             request = scrapy.Request(
                 "https://n86t1r3owz-3.algolianet.com/1/indexes/*/queries?x-algolia-agent=Algolia%20for%20JavaScript%20(3.35.1)%3B%20Browser%20(lite)%3B%20JS%20Helper%202.21.1&x-algolia-application-id=N86T1R3OWZ&x-algolia-api-key=5140dac5e87f47346abbda1a34ee70c3",
                 method='post',
                 body=
                 "{\"requests\":[{\"indexName\":\"products\",\"params\":\"query=&hitsPerPage=16&page="
                 + str(i) +
                 "&restrictSearchableAttributes=&facets=%5B%5D&tagFilters=\"}]}",
                 headers={
                     "User-Agent":
                     "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0",
                     "Accept":
                     "application/json",
                     "Accept-Language":
                     "en-US,en;q=0.5",
                     "content-type":
                     "application/x-www-form-urlencoded",
                     "Referer":
                     "https://www.indiehackers.com/products?sorting=recently-updated"
                 })
             response = yield self.crawler.engine.download(request, self)
             json_data = json.loads(response.text)
             results = json_data["results"]
             hits = results[0]["hits"]
             if not hits:
                 break
             keys = [
                 "name", "tagline", "revenue", "revenue_type",
                 "numFollowers", "websiteUrl", "description", "city",
                 "country", "twitterHandle"
             ]
             for product in hits:
                 if ('stripe-verified-revenue' in product["_tags"]):
                     product["revenue_type"] = 'stripe-verified revenue'
                 else:
                     product["revenue_type"] = 'self-reported revenue'
             with open(self.raw_data_file, 'a') as outfile:
                 writer = DictWriter(outfile, keys, extrasaction='ignore')
                 writer.writeheader()
                 writer.writerows(hits)
         except Exception as e:
             print("exception is", e)
Exemple #58
0
 def configure(self, config_dict, registry_config_str):
     """
     Set the Interface attributes from the configurations provided by the Platform Driver, and create the "device" if
     it doesn't already exist
     :param config_dict: Dictionary of configuration values passed from the Platform Driver
     :param registry_config_str: String representation of the registry configuration passed from the Platform Driver
     """
     # Set the CSV interface's necessary attributes from the configuration
     self.csv_path = config_dict.get("csv_path", "csv_device.csv")
     # If the configured path doesn't exist, create the CSV "device" file using the global defaults
     # so that we have something to test against
     if not os.path.isfile(self.csv_path):
         _log.info("Creating csv 'device'")
         with open(self.csv_path, "w+") as csv_device:
             writer = DictWriter(csv_device, fieldnames=CSV_FIELDNAMES)
             writer.writeheader()
             writer.writerows(CSV_DEFAULT)
     # Then parse the registry configuration to create our registers
     self.parse_config(registry_config_str)
Exemple #59
0
def run_set_output_check() -> None:
    """get the latest all json, pop out the energy values for the last month"""
    r = requests.get(V3_ALL_URL)
    v3 = load_statset(r.json())

    if not v3:
        raise Exception("Failed to get v3 all data")

    check_date = get_last_month()

    logger.info("Checking {}".format(check_date))

    check_result = {
        "run_dt": str(datetime.now()),
        "check_network": "NEM",
        "check_date": str(check_date),
    }

    for i in v3.data:
        if not i.id:
            continue

        if not i.id.endswith("energy"):
            continue

        d = i.history.get_date(check_date)

        if i.fuel_tech and d:
            check_result[i.fuel_tech] = d

    current_records = _get_current_records()

    logger.info("Have {} current records".format(len(current_records)))

    current_records.append(check_result)

    # write to csv string
    output = io.StringIO()
    csv_output = DictWriter(output, fieldnames=CSV_OUTPUT_COLUMNS)
    csv_output.writeheader()
    csv_output.writerows(current_records)

    write_to_s3(output.getvalue(), OUTPUT_THE_CHECK_CSV_PATH, "text/csv")
def _dicts_to_tsv(data_dicts, first_fields, descriptions_dict):
    '''
    >>> data_dicts = [
    ...   # explicit subtitle
    ...   {'title': 'Star Wars', 'subtitle': 'A New Hope', 'date': '1977'},
    ...   # empty subtitle
    ...   {'title': 'The Empire Strikes Back', 'subtitle': '', 'date': '1980'},
    ...   # N/A subtitle
    ...   {'title': 'Return of the Jedi', 'date': '1983'}
    ... ]
    >>> descriptions_dict = {
    ...   'title': 'main title',
    ...   'date': 'date released',
    ...   'extra': 'should be ignored'
    ... }
    >>> lines = _dicts_to_tsv(data_dicts, ['title'], descriptions_dict).split('\\r\\n')
    >>> for line in lines:
    ...   print('| ' + ' | '.join(line.split('\\t')) + ' |')
    | title | date | subtitle |
    | #main title | date released |  |
    | Star Wars | 1977 | A New Hope |
    | The Empire Strikes Back | 1980 |  |
    | Return of the Jedi | 1983 | N/A |
    |  |
    '''
    # wrap in default dicts that return 'n/a'
    body_fields = sorted(set().union(*[d.keys() for d in data_dicts]) -
                         set(first_fields))
    for dd in data_dicts:
        for field in body_fields:
            if field not in dd:
                dd[field] = 'N/A'
    output = StringIO()
    writer = DictWriter(output,
                        first_fields + body_fields,
                        delimiter='\t',
                        extrasaction='ignore')
    writer.writeheader()
    writer.writerows([descriptions_dict] + data_dicts)
    tsv = output.getvalue()
    tsv_lines = tsv.split('\n')
    tsv_lines[1] = '#' + tsv_lines[1]
    return '\n'.join(tsv_lines)