Example #1
1
    def save_job_results(geocoder, job_id):
        """
        Download and save to S3 results for completed jobs.
        """
        logging.info('Saving results for %s to S3' % job_id)
        finished_folder = 'geocode_finished_jobs'
        pending_folder = 'geocode_pending_jobs'

        connection = boto.connect_s3()
        bucket = connection.get_bucket(GEO_BUCKET)
        old_key = bucket.get_key('%s/%s' % (pending_folder, job_id))

        new_name = old_key.get_contents_as_string()
        new_key = Key(bucket)
        new_key.key = '%s/%s' % (finished_folder, new_name)

        results = geocoder.get_job_results(job_id)
        result_string = StringIO.StringIO()
        writer = DictWriter(result_string, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)
        result_string.seek(0)

        email_address = old_key.get_metadata('email')
        if email_address:
            new_key.set_metadata('email', email_address)
            send_email_notification(
                email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished')

        new_key.set_contents_from_string(result_string.getvalue())
        new_key.make_public()
        old_key.delete()
def main(argv):
    uname, pwd, filelist, ifname, ofname = getParms()
    # try opening the files    
    try:
        #scl enable python27 bash
        # to allow multiple openings on one line
        with open(filelist, "rb") as fhl, open(ifname, "rb") as fhi, open(ofname, "wb") as fho:
            # read in the list of filenames to insert
            d = {}
            for line in fhl:
                fname = line.split('/')[-1].rstrip()
                #filename points to folder
                parts = splitFilename(fname)
                crawldir = parts.group(1)
                if crawldir in d:
                    d[crawldir].append(line.rstrip())
                else:
                    d[crawldir] = [line.rstrip()]

            fields = ['identifier','filename','folder','date_created','checksum', \
                      'series_number','creating_body','crawl_start', 'crawl_end', \
                      'filesize', 'unit']
            reader = DictReader(fhi, fieldnames=fields)
            writer = DictWriter(fho, delimiter=',', fieldnames=fields)
            writer.writerow(dict((fn,fn) for fn in fields))
            print "[INFO] Opened files successfully."
            insertFiles(uname, pwd, d, reader, writer)
    except IOError as e:
        print "[IOERROR] " + e
def run_queries(session, state):

                           
    query_inputs = term_handler(state)
    
    combinations = cartesian_product(query_inputs)
    
    for query in combinations:
        PARAMS.update(query)
        logger.info('query')
        sleep(SLEEP_TIME)
        page = session.get(url = OB_BASE % SEARCH_URL,
                                params = PARAMS)
        logger.info('got page')
        pricing_data = page.json()
        with open(WRITE_FILE_PATH, 'wb') as output_file:
            fieldnames = pricing_data['PricingRecords'][0].keys()
            fieldnames.append('Scenario')
            print 'FIELDNAMES %s' % fieldnames
            logger.info('header %s' % fieldnames)
            csv_output = DictWriter(output_file, fieldnames=fieldnames)
            csv_output.writeheader()
            for row in pricing_data['PricingRecords']:
                row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']],
                                                                                                    product=query_inputs["ProductType"][query["ProductType"]],
                                                                                                    purpose=query_inputs["Purpose"][query["Purpose"]],
                                                                                                    amount=query_inputs["LoanAmount"][query["LoanAmount"]],
                                                                                                    ltv=query_inputs["LTV"][query["LTV"]],
                                                                                                    fico=query_inputs["FICO"][query["FICO"]],
                                                                                                    term=query_inputs["Term"][query["Term"]])
                
        
                logger.info('adding row %s' % row)
                csv_output.writerow(row)
Example #4
0
def main(infile, outfile):
    with open(infile) as inf, open(outfile, "w") as outf:
        r = DictReader(inf)
        rows = [process(row) for row in r]
        w = DictWriter(outf, fieldnames=rows[0].keys())
        w.writeheader()
        w.writerows(rows)
Example #5
0
def run(args):

    opts = parse_args(args)
    opts.prefix = opts.prefix or opts.graph.split('.', 1)[0]

    sheets = {}
    sheet_headers = {}

    try:
        with file(opts.graph) as csvfile:
            for row in reader(csvfile):
                fv = dict(column.split('=', 1) for column in row)
                entity_type = fv.pop('Entity Type')
                headers = fv.keys()
                if entity_type not in sheets:
                    sheets[entity_type] = [fv]
                    sheet_headers[entity_type] = set(headers)
                    continue
                else:
                    sheets[entity_type].append(fv)
                if len(headers) > len(sheet_headers[entity_type]):
                    sheet_headers[entity_type].union(headers)

        for entity_type in sheets:
            with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile:
                csv = DictWriter(csvfile, sheet_headers[entity_type])
                csv.writeheader()
                csv.writerows(sheets[entity_type])
    except IOError, e:
        print 'csv2sheets: %s' % e
        exit(-1)
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", "w"), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x["id"] for x in self.test], self.predictions):
         d = {"id": ii, "position": pp}
         o.writerow(d)
Example #7
0
    def test_01_importTab(self):
        #先检查表,如果都没有则直接创建
        #client.drop_database(DB_INFO['DB_NAME'])
        SD.importTab()
        SD.copy_table()
        self.assertIn('FACT_ATTR', db.collection_names())
        self.assertIn('FACT_SERVICE', db.collection_names())
        self.assertIn('FACT_ATTR_SET', db.collection_names())
        self.assertIn('FACT_SCENE', db.collection_names())

        #如果有,先检查id如果不存在则增加,否则update
        L = list()
        table = 'FACT_ATTR'
        filepath = os.path.sep.join([os.path.abspath(Const.DATA_IN), table])
        with open(filepath, 'r') as f:
            dReader = DictReader(f)
            L = [i for i in dReader]
        L[-1]['attr_set_id'] = 1
        L[-1]['value'] = 'rampage'
        L.append({'_id': 4, 'name': 'attr4', 'type_id':6, 'value': 'test', 'attr_set_id': 2})
        with open(filepath, 'w') as f:
            titles = L[-1].keys()
            dwriter = DictWriter(f, titles)
            header = dict(zip(titles, titles))
            dwriter.writerow(header)
            dwriter.writerows(L)

        SD.importTab()
        #这里顺便把查询也一起测了
        match = {'_id': {'$in': [3,4]}}
        rs = list(db[table].find(match))
        self.assertEqual(len(rs), 2)
        self.assertEqual(rs[-2]['attr_set_id'], 1)
        self.assertEqual(rs[-2]['value'], 'rampage')
Example #8
0
 def write_csv(self, array, fname, delimiter=":"):
     with open(fname, "w") as f:
         fieldnames = list(array[0].keys())
         writer = DictWriter(f, delimiter=delimiter, lineterminator="\n", fieldnames=fieldnames)
         writer.writerow(dict((field, field) for field in fieldnames))
         for row in array:
             writer.writerow(row)
Example #9
0
def write_report(report):
    f = open(report.filename, 'wb')
    print >>f, ','.join(report.columns)
    writer = DictWriter(f, report.columns)
    for row in report.rows:
        writer.writerow(_encode_row(row))
    f.close()
def main(argv):
    mountpoint, ifname, ofname = getParms()
    if not mountpoint.endswith('/'):
        mountpoint = mountpoint  + '/'
    metaname = mountpoint + 'RW_32/metadata_v7.csv'
    # try opening the files    
    try:
        #scl enable python27 bash
        # to allow multiple openings on one line
        with open(metaname, "rb") as mi, open(ifname, "rb") as cl, open(ofname, "wb") as mo:
            fields = ['identifier','filename','folder','date_created','checksum', \
                      'series_number','creating_body','crawl_start', 'crawl_end', \
                      'filesize', 'unit']
            all_fields = fields + ['date_archivist_note', 'archivist_note']
            metareader = DictReader(mi, fieldnames=fields)
            creader = reader(cl)
            # will always be tiny wrt metadata so slurp
            corrupt = {}
            for row in creader:
                corrupt[row[0]] = [row[1], row[2]]
            writer = DictWriter(mo, delimiter=',', fieldnames=all_fields)
            writer.writerow(dict((fn,fn) for fn in all_fields))
            print "[INFO] Opened files successfully."
            modifyMetadata(metareader, corrupt, writer)
    except IOError as e:
        print "[IOERROR] " + e
Example #11
0
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom):

    n_type_keys = {
        'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services',
                  'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'],
        'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services',
                  'time_perc_all_services', 'all_services_usage', 'usage_trend'],
        }

    buff = StringIO()
    writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore')
    writer.writeheader()

    for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type):
        d = stat.to_dict()
        d['start'] = req_input.user_start
        d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else ''
        writer.writerow(d)

    out = buff.getvalue()
    buff.close()

    response = HttpResponse(out, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv')

    return response
def test_behavior_strategy(b: Behavior, s: Strategy, size=20):
    TRIALS = 10**2
    results = []
    start = time()
    dynamic = False
    for _ in range(TRIALS):
        r = MemoryManager(s, size, dynamic).handle_string(generate_list(b))
        results.append(r)
    end = time()
    avg_time = (end - start)/TRIALS
    print('Average time: ', avg_time)
    print('Minimum no. page faults: ', min(results))
    print('Maximum no. page faults: ', max(results))
    avg = sum(results)/len(results)
    print('Average no. page faults: ', avg)
    with open('benchmarks.csv', 'r') as record_file:
        data = DictReader(record_file)
        entries = [i for i in data]
    entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults']
    new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)}
    entries.append(new_entry)
    entries = sorted(entries, key=itemgetter('Behavior', 'Strategy'))
    with open('benchmarks.csv', 'w', newline='') as record_file:
        writer = DictWriter(record_file, entry_fields)
        writer.writeheader()
        writer.writerows(entries)
def export_feature_matrix_csv(feature_matrix, path, delimiter = ','):
    """
    Save a FeatureMatrix as a column-delimited text file

    Parameters
    ----------
    feature_matrix : FeatureMatrix
        FeatureMatrix to save to text file
    path : str
        Full path to write text file
    delimiter : str
        Character to mark boundaries between columns.  Defaults to ','
    """
    with open(path, encoding='utf-8-sig', mode='w') as f:
        header = ['symbol'] + feature_matrix.features
        writer = DictWriter(f, header,delimiter=delimiter)
        writer.writerow({h: h for h in header})
        for seg in feature_matrix.segments:
            #If FeatureMatrix uses dictionaries
            #outdict = feature_matrix[seg]
            #outdict['symbol'] = seg
            #writer.writerow(outdict)
            if seg in ['#','']: #wtf
                continue
            featline = feature_matrix.seg_to_feat_line(seg)
            outdict = {header[i]: featline[i] for i in range(len(header))}
            writer.writerow(outdict)
Example #14
0
def job_result_csv(job_id):
    db_session = db.get_session()
    db_job = db_session.query(PersistentJob).get(job_id)
    if not db_job:
        return json_error('no task exists with id: {0}'.format(job_id))
    celery_task = Job.task.AsyncResult(db_job.result_key)
    if celery_task.ready():
        task_result = celery_task.get()
        
        csv_io = StringIO()
        if task_result:
            # if task_result is not empty find header in first row
            fieldnames = ['user_id'] + sorted(task_result.values()[0].keys())
        else:
            fieldnames = ['user_id']
        writer = DictWriter(csv_io, fieldnames)
        
        task_rows = []
        # fold user_id into dict so we can use DictWriter to escape things
        for user_id, row in task_result.iteritems():
            row['user_id'] = user_id
            task_rows.append(row)
        writer.writeheader()
        writer.writerows(task_rows)
        app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue())
        return Response(csv_io.getvalue(), mimetype='text/csv')
    else:
        return json_response(status=celery_task.status)
Example #15
0
 def csv_results(self, csv_file, histogram_size=None):
     specs = self.token_categories
     names_from_type = {
         spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs
     }
     spec_names = names_from_type.values()
     spec_names.sort()
     spec_names.insert(0, "idea")
     dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id in ordered_idea_ids:
         base = values[idea_id]
         sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()}
         sums['idea'] = idea_names[idea_id]
         dw.writerow(sums)
Example #16
0
def output_results(poi_result_set, screen=True, outfile=None):
    """
    Outputs unified DBSCAN results to screen or csv file.
    The screen only shows major data elements. The CSV file has the 
    complete dictionary (i.e., base dictionay plus ZOA attributes for each POI)
    """
    assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple'

    if screen:
        print "\nZOAs by POI"
        print "="*80,
        for poi in poi_result_set:                
            print "\nLocation:\t%s" % poi[s.NAME_KEY]
            print "Address:\t%s" % poi[s.ADDR_KEY]
            print "Neighborhood:\t%s" % poi[s.NBHD_KEY]
            print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY])
            print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] 
        
    if outfile:
        assert isinstance (outfile, str), "Outfile name is not a string: %r" % name
        if outfile[-4:] != '.csv': outfile += '.csv'
        with open(outfile, 'wb') as f:
            target = DictWriter(f, poi_result_set[0].keys())
            target.writeheader()
            target.writerows(poi_result_set)
        print "\nWrote output to %s.\n" % outfile
Example #17
0
def main():
    '''
        >>> main() # stuff happens
    '''

    args = parse_args()
    logging.basicConfig(filename=args.log, level=logging.INFO)

    input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0))
    field_names = set()

    for input in args.inputs:
        with open(input) as handle:
            kraken_data = parse_kraken_file(handle)

            for row in kraken_data:
                field_names.add(row['ncbi_taxid'])
                input_otu_counts[input][row['ncbi_taxid']] += 1

    field_names = ['input'] + sorted([ i for i in field_names ])

    with open(args.output, 'w') as handle:
        writer = DictWriter(handle,
                            fieldnames=field_names)

        writer.writeheader()

        for input, otu_counts in list(input_otu_counts.items()):
            otu_counts['input'] = input
            writer.writerow(otu_counts)
def main():
    layout = construct_layout(OFF_PROPERTY_LAYOUT)
    header = get_active_header(OFF_PROPERTY_LAYOUT)

    # Prepare CSV output to stdout
    writer = DictWriter(stdout, fieldnames=header)
    writer.writeheader()

    parse = Struct(layout).unpack_from
    struct_length = calcsize(layout)

    for line in get_stdin_bytes().readlines():
        # Ensure string length is what deconstructer expects
        if len(line) != struct_length:
            line = '{:<{}s}'.format(line.decode(), struct_length).encode()

        # Deconstruct fixed-width string
        row = parse(line)

        # Decode each value
        row = (v.decode('ascii', 'ignore') for v in row)

        # Trim whitespace in each field
        row = [field.strip() for field in row]

        # Convert to dict using header
        row = dict(zip(header, row))

        writer.writerow(row)
def get_vf_precincts(loc_data, precinct_data):
	with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w:
		vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP)
		vfp_writer.writeheader()
		vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP)
		vfep_writer.writeheader()
		for key, vfp_dict in precinct_data.iteritems():
			zips = vfp_dict.pop('zips')
			max_count = 0
			max_zip = 0
			total_count = 0
			for zip_val, zip_count in zips.iteritems():
				total_count += zip_count
				if zip_count > max_count:
					max_count = zip_count
					max_zip = zip_val
			vfp_dict['vf_precinct_zip'] = max_zip
			vfp_dict['vf_precinct_count'] = total_count
			examples = vfp_dict.pop('examples')
			vfp_writer.writerow(vfp_dict)
			ex_count = 1
			for ex in examples:
				for key in Conversions.VF_EX:
					vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key]
				ex_count += 1
			vfep_writer.writerow(vfp_dict)
def run():
    output = open(sys.argv[1], 'w')
    writer = DictWriter(output, fieldnames=['uid', 'data'])
    writer.writeheader()
    db = DB(dbconfig)

    for uid in fetch_users(db):
        data = fetch_user_location_logs(uid, db)
        locations = merge_locations(data)
        matrix = generate_matrix(locations)
        semantic_data = fetch_semantic_data(list(matrix.keys()))
        semantic_dict = {}
        for row in semantic_data:
            semantic_dict[row['location']] = clean_tags(row['tags'], 5)
        tag_matrix = {}
        for location, proba in list(matrix.items()):
            tag_dict = semantic_dict[location]
            tag_weight = sum(v for v in list(tag_dict.values()))
            if tag_weight == 0:
                continue
            for tag, cnt in list(tag_dict.items()):
                tag_matrix.setdefault(tag, [0] * 48)
                for i in range(48):
                    tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001)
        writer.writerow({
            'uid': uid,
            'data': json.dumps(tag_matrix)
        })
    output.close()
def process_vf(loc_data):
	precinct_data = {}
	with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w:
		reader = DictReader(r, dialect='excel-tab')
		writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED)
		writer.writeheader()
		vf_hashes = set()
		p_count = 0
		for row in reader:
			if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper():
				continue
			vf_hash = get_hash(row, HashFields.VF)
			if vf_hash in vf_hashes:
				continue
			vf_hashes.add(vf_hash)
			vfp_hash = get_hash(row, HashFields.VFP)
			row_zip = row['vf_reg_cass_zip']
			if vfp_hash not in precinct_data:
				p_count += 1
				precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP)
				precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count)
				precinct_data[vfp_hash]['zips'] = {row_zip:1}
				precinct_data[vfp_hash]['examples'] = []
			elif row_zip not in precinct_data[vfp_hash]['zips']:
				precinct_data[vfp_hash]['zips'][row_zip] = 1
			else:
				precinct_data[vfp_hash]['zips'][row_zip] += 1
			vf_output = get_conversion(row, Conversions.VF)
			if len(precinct_data[vfp_hash]['examples']) < 5:
				precinct_data[vfp_hash]['examples'].append(vf_output)
			vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] 
			vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:])
			writer.writerow(vf_output)
	return precinct_data
Example #22
0
def plot_file(filename1):
    base_name = os.path.basename(filename1)
    name_parts = base_name.split('_')
    work_path = os.path.dirname(__file__)
    scores_filename = os.path.join(
        work_path,
        '_'.join(name_parts[:2] + ['v3loop_scores.csv']))
    if os.path.exists(scores_filename):
        with open(scores_filename) as f:
            reader = DictReader(f)
            score_rows = [list(map(int, row))
                          for row in map(itemgetter('score', 'count'), reader)]
    else:
        source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/',
                               filename1)
        source2 = source1.replace('_R1_', '_R2_')
        start = datetime.now()
        with open(source1) as fastq1, open(source2) as fastq2:
            score_counts = align_reads(fastq1, fastq2)
        print('{}: {}'.format(datetime.now() - start, filename1))
        score_rows = sorted(score_counts.items())
        with open(scores_filename, 'w') as scores_csv:
            writer = DictWriter(scores_csv,
                                ('score', 'count'),
                                lineterminator=os.linesep)
            writer.writeheader()
            for score, count in score_rows:
                writer.writerow(dict(score=score, count=count))
    scores = [row[0] for row in score_rows]
    counts = [row[1] for row in score_rows]
    total_count = float(sum(counts))
    fractions = [count/total_count for count in counts]
    plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
Example #23
0
def dump_csv(pages, options):
    """Dump in CSV format.

    ``pages`` is an iterable of (field, value) tuples.

    It's assumed that the same fields are used in each tuple.
    """
    from itertools import chain
    from csv import DictWriter
    from sys import stdout
    pages = iter(pages)
    try:
        first_row = pages.next()
    except StopIteration:
        return
    fields = [item[0] for item in first_row]
    rows = chain((first_row,), pages)
    dicts = (dict(page) for page in rows)
    dicts = (process_page(row) for row in dicts)

    def validate_row_length(row_dict):
        if len(row_dict) != len(fields):
            raise DataValidationError(
                'Inconsistent number of fields in row {0}.\n'
                'Fields: {1}'.format(row_dict, fields))
        return row_dict
    dicts = (validate_row_length(row) for row in dicts)

    writer = DictWriter(stdout, fields, dialect='excel-tab')
    writer.writerow(dict((v, v) for v in fields))
    writer.writerows(dicts)
Example #24
0
def convert_powertracker_log_to_csv(path):
    """
    This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data).
    This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py.

    :param path: path to the experiment (including [with-|without-malicious])
    """
    platforms = [p.capitalize() for p in get_available_platforms()]
    data, results = join(path, 'data'), join(path, 'results')
    with open(join(data, 'powertracker.log')) as f:
        log = f.read()
    iterables, fields = [], ['mote_id']
    for it in PT_ITEMS:
        time_field = '{}_time'.format(it)
        iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE))
        fields.append(time_field)
    with open(join(results, 'powertracker.csv'), 'w') as f:
        writer = DictWriter(f, delimiter=',', fieldnames=fields)
        writer.writeheader()
        for matches in zip(*iterables):
            row = {}
            for m in matches:
                row.update((k, int(v)) for k, v in m.groupdict().items())
            for it in PT_ITEMS:
                time_field = '{}_time'.format(it)
                row[time_field] = float(row[time_field] / 10 ** 6)
            writer.writerow(row)
def main(args):
    with open("users.csv", "w") as f:
        fieldnames = ["username", "first_name", "last_name", "email", "phone", "institution", "voro_account"]
        csvFile = DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
        # write header row
        field_dict = dict([(x, x.capitalize()) for x in fieldnames])
        csvFile.writerow(field_dict)
        for user in User.objects.all():
            # look up associated profile & inst
            try:
                profile = user.get_profile()
                phone = profile.phone
                voro_account = profile.voroEAD_account
            except UserProfile.DoesNotExist:
                phone = ""
                voro_account = False
            user.__dict__["phone"] = phone
            user.__dict__["voro_account"] = voro_account
            # inst through group
            groups = user.groups.all()
            instname = ""
            if len(groups):
                firstgroup = user.groups.all()[0]
                grpprofile = firstgroup.groupprofile
                insts = grpprofile.institutions.all()
                if len(insts):
                    instname = insts[0].name
                else:
                    instname = ""
            user.__dict__["institution"] = instname.encode("utf-8")
            csvFile.writerow(user.__dict__)
Example #26
0
    def handle(self, *args, **options):
        print('Start.')
        sampled_comments = []
        feeds = Facebook_Feed.objects.all()
        from_date = dateutil.parser.parse(options['from_date'])
        to_date = dateutil.parser.parse(options['to_date'])
        sample_size = options['sample_size']
        batch_size = options['batch_size']

        for i, feed in enumerate(feeds):
            print('working on feed {} of {}'.format(i + 1, feeds.count()))
            residual = None
            sampled_comments_for_feed = []
            statuses_for_feed = Facebook_Status.objects.filter(feed__id=feed.id).filter(
                published__range=[from_date, to_date]).order_by(
                'comment_count')

            for i, status in enumerate(statuses_for_feed):
                is_last = i + 1 == len(statuses_for_feed)
                samples_for_status, residual = self.sample(status, is_last, residual, sample_size=sample_size,
                                                           batch_size=batch_size)
                sampled_comments_for_feed += samples_for_status

            sampled_comments += sampled_comments_for_feed
        print('total_comments:', len(sampled_comments))
        with open('{}.csv'.format(args[0]), 'wb') as f:
            fieldnames = ['comment_id', 'status_id']
            writer = DictWriter(f, fieldnames=fieldnames)
            writer.writerow({'comment_id': 'comment_id', 'status_id': 'status_id'})
            for row in sampled_comments:
                writer.writerow({'comment_id': row['comment_id'], 'status_id': row['parent__status_id']})
        print('Done.')
    def handle_noargs(self, **options):
        print "in the command..."

        comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via')

        petition_headers = comment_keys + ('name', 'email')
        # Petition signatories from the first two experiments
        for filename, url_path in [
            ('petition-1.csv', '/county-performance/petition'),
            ('petition-2.csv', '/county-performance-2/petition'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, petition_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['name'] = comment
                    row_data['email'] = f.email
                    writer.writerow(row_data)

        senate_headers = comment_keys + ('comment',)
        for filename, url_path in [
            ('senate-1.csv', '/county-performance/senate'),
            ('senate-2.csv', '/county-performance-2/senate'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, senate_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['comment'] = comment
                    writer.writerow(row_data)
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", 'w'), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x['id'] for x in self.test], self.predictions):
         d = {'id': ii, 'position': pp}
         o.writerow(d)
def output_list(to_save,path):
    head = to_save[0].keys()
    with open(path,'w') as f:
        csvwriter = DictWriter(f,head,delimiter='\t')
        csvwriter.writerow({x: x for x in head})
        for l in to_save:
            csvwriter.writerow(l)
def make_classify_report_from_lima(clips_filename,
                                   primer_index_dict,
                                   flnc_bam=None):
    """
    clips format:

    """
    if flnc_bam is not None:
        flnc_len_dict = dict(
            (r.qname, r.qlen) for r in pysam.Samfile(flnc_bam, check_sq=False))
    else:
        flnc_len_dict = None
        print >> sys.stderr, "WARNING: FLNC BAM not provided. `polyAlen` and `insertlen` fields will be `NA`."

    f = open('classify_report.csv', 'w')
    writer = DictWriter(f, fieldnames=report_fields, delimiter=',')
    writer.writeheader()
    first_of_pair_seen = False
    rec = {
        'id': None,
        'strand': None,
        'fivelen': None,
        'threelen': None,
        'polyAlen': None,
        'insertlen': None,
        'primer_index': None,
        'primer': None
    }

    for r in SeqIO.parse(open(clips_filename), 'fasta'):
        m = clip_rex.match(r.description)
        zmw = m.group(1) + '/ccs'
        s, e = int(m.group(2)), int(m.group(3))
        bc = int(m.group(4))

        if primer_index_dict[bc][0] == '5p':
            p5 = bc
            rec['fivelen'], start5, end5 = e - s, s, e
        else:
            assert primer_index_dict[bc][0] == '3p'
            p3 = bc
            rec['threelen'], start3, end3 = e - s, s, e

        if first_of_pair_seen:  # both pairs seen, write out and reset
            assert rec['id'] == zmw
            rec['strand'] = '+' if end5 < end3 else '-'
            if flnc_len_dict is None or zmw not in flnc_len_dict:
                rec['insertlen'] = 'NA'
                rec['polyAlen'] = 'NA'
            else:
                rec['insertlen'] = flnc_len_dict[zmw]
                if rec['strand'] == '+':
                    rec['polyAlen'] = start3 - end5 - rec['insertlen']
                else:
                    rec['polyAlen'] = start5 - end3 - rec['insertlen']

            rec['primer'] = "{0}--{1}".format(primer_index_dict[p5][1],
                                              primer_index_dict[p3][1])
            rec['primer_index'] = "{0}--{1}".format(p5, p3)
            writer.writerow(rec)
            #f.write("{id},{len5},{len3},{lenA},{lenI},{pn5}--{pn3},{p5}--{p3}\n".format(\
            #    id=zmw, len5=len5, len3=len3,
            #    lenA=lenA, lenI=lenI,
            #    pn5=primer_index_dict[p5][1], pn3=primer_index_dict[p3][1],
            #    p5=p5, p3=p3))

            # reset variables
            first_of_pair_seen = False
            rec = {
                'id': None,
                'strand': None,
                'fivelen': None,
                'threelen': None,
                'polyAlen': None,
                'insertlen': None,
                'primer_index': None,
                'primer': None
            }
            p5, p3, start5, end5, start3, end3 = None, None, None, None, None, None
        else:  # first of the pair
            rec['id'] = zmw
            first_of_pair_seen = True
    f.close()
    print >> sys.stderr, "Classify report written to: {0}".format(f.name)
Example #31
0
def run_all():
    parser = argparse.ArgumentParser()
    parser.add_argument('--point_map_file', '-p', default='point_map.csv')
    parser.add_argument('--nghds_outfile',
                        default='outputs/nghds_num_tweets_by_hour.csv')
    parser.add_argument('--bins_outfile',
                        default='outputs/bins_num_tweets_by_hour.csv')
    args = parser.parse_args()

    point_nghd_map = load_point_nghd_map(args.point_map_file)

    bin_hr_counts = Counter()  # (lat, lon, hr) -> count
    nghd_hr_counts = Counter()  # (nghd, hr) -> count

    ctr = 0
    for tweet in db.tweet_pgh.find():
        ctr += 1
        if ctr % 1000 == 0:
            print '%d tweets processed' % ctr
        lat = tweet['coordinates']['coordinates'][1]
        lon = tweet['coordinates']['coordinates'][0]
        (lat, lon) = (round(float(lat), 3), round(float(lon), 3))
        if (lat, lon) in point_nghd_map:
            nghd = point_nghd_map[(lat, lon)]
        else:
            nghd = 'Outside Pittsburgh'

        hr = util.util.get_tweet_hour(tweet)
        bin_hr_counts[(lat, lon, hr)] += 1
        nghd_hr_counts[(nghd, hr)] += 1

    pprint.pprint(nghd_hr_counts)
    bin_writer = DictWriter(open(args.bins_outfile, 'w'),
                            ['lat', 'lon', 'hour', 'count'])
    bin_writer.writeheader()
    for bin_hr, count in bin_hr_counts.items():
        bin_writer.writerow({
            'lat': bin_hr[0],
            'lon': bin_hr[1],
            'hour': bin_hr[2],
            'count': count
        })

    nghd_writer = DictWriter(open(args.nghds_outfile, 'w'),
                             ['nghd', 'hour', 'count'])
    nghd_writer.writeheader()
    for nghd_hr, count in nghd_hr_counts.items():
        nghd_writer.writerow({
            'nghd': nghd_hr[0],
            'hour': nghd_hr[1],
            'count': count
        })
Example #32
0
from csv import DictWriter
with open('final.csv', 'w') as f:
    csv_writer = DictWriter(f, fieldnames=['first_name', 'last_name', 'age'])
    csv_writer.writerow({
        'first_name': 'sffssd',
        'last_name': 'dfgh',
        'age': 500
    })
Example #33
0
def main():
    # parse the command line arguments
    arg_parser = ConfigArgumentParser(description=__doc__)

    arg_parser.add_argument("device_id",
                            type=int,
                            help="Device ID of the target device")

    arg_parser.add_argument(
        "--address",
        help=
        "Address of target device, may be needed to help route initial request to device."
    )

    arg_parser.add_argument("--out-file",
                            type=argparse.FileType('wb'),
                            help="Optional output file for configuration",
                            default=sys.stdout)

    arg_parser.add_argument(
        "--max_range_report",
        nargs='?',
        type=float,
        help=
        'Affects how very large numbers are reported in the "Unit Details" column of the output. '
        'Does not affect driver behavior.',
        default=1.0e+20)

    args = arg_parser.parse_args()

    _log.debug("initialization")
    _log.debug("    - args: %r", args)

    # make a device object
    this_device = LocalDeviceObject(
        objectName=args.ini.objectname,
        objectIdentifier=int(args.ini.objectidentifier),
        maxApduLengthAccepted=int(args.ini.maxapdulengthaccepted),
        segmentationSupported=args.ini.segmentationsupported,
        vendorIdentifier=int(args.ini.vendoridentifier),
    )

    # make a simple application
    this_application = SynchronousApplication(this_device, args.ini.address)

    _log.debug("starting build")

    result = get_iam(this_application, args.device_id, args.address)

    #     request = WhoIsRequest()
    #     request.pduDestination = target_address
    #     result = this_application.make_request(request, expect_confirmation = False)

    #     if not isinstance(result, IAmRequest):
    #         result.debug_contents()
    #         raise TypeError("Error making WhoIs request, try running again.")

    #     device_type, device_instance = result.iAmDeviceIdentifier
    #     if device_type != 'device':
    #         raise DecodingError("invalid object type")

    target_address = result.pduSource

    _log.debug('pduSource = ' + repr(result.pduSource))
    _log.debug('iAmDeviceIdentifier = ' + str(result.iAmDeviceIdentifier))
    _log.debug('maxAPDULengthAccepted = ' + str(result.maxAPDULengthAccepted))
    _log.debug('segmentationSupported = ' + str(result.segmentationSupported))
    _log.debug('vendorID = ' + str(result.vendorID))

    device_id = result.iAmDeviceIdentifier[1]

    try:
        device_name = read_prop(this_application, target_address, "device",
                                device_id, "objectName")
        _log.debug('device_name = ' + str(device_name))
    except TypeError:
        _log.debug('device missing objectName')

    try:
        device_description = read_prop(this_application, target_address,
                                       "device", device_id, "description")
        _log.debug('description = ' + str(device_description))
    except TypeError:
        _log.debug('device missing description')

    config_writer = DictWriter(
        args.out_file, ('Reference Point Name', 'Volttron Point Name', 'Units',
                        'Unit Details', 'BACnet Object Type', 'Property',
                        'Writable', 'Index', 'Write Priority', 'Notes'))

    config_writer.writeheader()

    try:
        objectCount = read_prop(this_application,
                                target_address,
                                "device",
                                device_id,
                                "objectList",
                                index=0)
        list_property = "objectList"
    except TypeError:
        objectCount = read_prop(this_application,
                                target_address,
                                "device",
                                device_id,
                                "structuredObjectList",
                                index=0)
        list_property = "structuredObjectList"

    _log.debug('objectCount = ' + str(objectCount))

    for object_index in xrange(1, objectCount + 1):
        _log.debug('object_device_index = ' + repr(object_index))

        bac_object = read_prop(this_application,
                               target_address,
                               "device",
                               device_id,
                               list_property,
                               index=object_index)

        obj_type, index = bac_object

        try:
            process_object(this_application, target_address, obj_type, index,
                           args.max_range_report, config_writer)
        except:
            _log.debug("Unexpected error processing object: {} {}".format(
                obj_type, index))
            _log.debug(traceback.format_exc())
Example #34
-1
 def csv_results(self, csv_file, histogram_size=None):
     histogram_size = histogram_size or 10
     bins = range(histogram_size)
     bins.insert(0, "idea")
     bins.extend(["avg", "std_dev"])
     dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results, histogram_size)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id, base in ordered_idea_ids:
         base = values[idea_id]
         r = dict(enumerate(base['histogram']))
         r['idea'] = idea_names[idea_id]
         r['avg'] = base['avg']
         r['std_dev'] = base['std_dev']
         dw.writerow(r)