Ejemplo n.º 1
1
def save_results(results, path, header = None, mode = 'w'):
    """
    Writes results to path specified 

    Parameters
    ----------
    results : dict
        the results to write
    path : str
        the path to the save file
    header : list
        Defaults to none
    mode : str
        defaults to 'w', or write. Can be 'a', append
    """
    if header is None:
        try:
            header = results.keys()
        except AttributeError:
            try:
                header = results[0].keys()
            except AttributeError:
                raise(Exception('Could not get the column header from the list, please specify the header.'))
    with open(path, mode, encoding = 'utf8') as f:
        writer = DictWriter(f, header)
        if mode != 'a':
            writer.writeheader()
        for line in results:
            writer.writerow({k: make_safe(line[k], '/') for k in header})
Ejemplo n.º 2
1
    def save_job_results(geocoder, job_id):
        """
        Download and save to S3 results for completed jobs.
        """
        logging.info('Saving results for %s to S3' % job_id)
        finished_folder = 'geocode_finished_jobs'
        pending_folder = 'geocode_pending_jobs'

        connection = boto.connect_s3()
        bucket = connection.get_bucket(GEO_BUCKET)
        old_key = bucket.get_key('%s/%s' % (pending_folder, job_id))

        new_name = old_key.get_contents_as_string()
        new_key = Key(bucket)
        new_key.key = '%s/%s' % (finished_folder, new_name)

        results = geocoder.get_job_results(job_id)
        result_string = StringIO.StringIO()
        writer = DictWriter(result_string, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)
        result_string.seek(0)

        email_address = old_key.get_metadata('email')
        if email_address:
            new_key.set_metadata('email', email_address)
            send_email_notification(
                email_address, geocoder.get_job_statuses(job_id=job_id), new_name, 'finished')

        new_key.set_contents_from_string(result_string.getvalue())
        new_key.make_public()
        old_key.delete()
Ejemplo n.º 3
0
    def write_rows(self, entries):
        """Write rows in the csv file"""

        with open(self.output_filename, 'w') as output_file:
            writer = DictWriter(output_file, FIELDNAMES)
            writer.writeheader()
            writer.writerows(entries)
Ejemplo n.º 4
0
def run_queries(session, state):

                           
    query_inputs = term_handler(state)
    
    combinations = cartesian_product(query_inputs)
    
    for query in combinations:
        PARAMS.update(query)
        logger.info('query')
        sleep(SLEEP_TIME)
        page = session.get(url = OB_BASE % SEARCH_URL,
                                params = PARAMS)
        logger.info('got page')
        pricing_data = page.json()
        with open(WRITE_FILE_PATH, 'wb') as output_file:
            fieldnames = pricing_data['PricingRecords'][0].keys()
            fieldnames.append('Scenario')
            print 'FIELDNAMES %s' % fieldnames
            logger.info('header %s' % fieldnames)
            csv_output = DictWriter(output_file, fieldnames=fieldnames)
            csv_output.writeheader()
            for row in pricing_data['PricingRecords']:
                row['Scenario'] = '{msa}|{product}{purpose}{amount}{ltv}{fico}LD30IO0{term}'.format(msa=query_inputs['MSALocation_Index'][query['MSALocation_Index']],
                                                                                                    product=query_inputs["ProductType"][query["ProductType"]],
                                                                                                    purpose=query_inputs["Purpose"][query["Purpose"]],
                                                                                                    amount=query_inputs["LoanAmount"][query["LoanAmount"]],
                                                                                                    ltv=query_inputs["LTV"][query["LTV"]],
                                                                                                    fico=query_inputs["FICO"][query["FICO"]],
                                                                                                    term=query_inputs["Term"][query["Term"]])
                
        
                logger.info('adding row %s' % row)
                csv_output.writerow(row)
Ejemplo n.º 5
0
 def csv_results(self, csv_file, histogram_size=None):
     specs = self.token_categories
     names_from_type = {
         spec.typename: spec.name.first_original().value.encode('utf-8') for spec in specs
     }
     spec_names = names_from_type.values()
     spec_names.sort()
     spec_names.insert(0, "idea")
     dw = DictWriter(csv_file, spec_names, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id in ordered_idea_ids:
         base = values[idea_id]
         sums = {names_from_type[k]: v for (k, v) in base['sums'].iteritems()}
         sums['idea'] = idea_names[idea_id]
         dw.writerow(sums)
def main():
    print "Collecting tweets for {track}".format(**query.track)
    tweets = get_twitters(query.twitter_url, parameters=query.track)

    # the filename is set in the query.py settings
    # write headers if the file does not exist
    write_headers = True
    write_opts = 'wb'

    if os.path.isfile(query.filename):
        write_headers = False
        write_opts = 'ab'
    csv_writer = None

    for tweet in tweets:
        tweet = json.loads(tweet)
        #todo: add the csv writer and json to row
        row = flatten_json(tweet)

        # setup CSV writer if is DNE
        if csv_writer is None:
            csv_writer = DictWriter(open(query.filename, write_opts), fieldnames=row.keys(), quoting=QUOTE_MINIMAL)
        # write header row
        if write_headers:
            csv_writer.writeheader()
            write_headers = False
        csv_writer.writerow(row)
Ejemplo n.º 7
0
def main():
    '''
        >>> main() # stuff happens
    '''

    args = parse_args()
    logging.basicConfig(filename=args.log, level=logging.INFO)

    input_otu_counts = defaultdict(lambda: defaultdict(lambda: 0))
    field_names = set()

    for input in args.inputs:
        with open(input) as handle:
            kraken_data = parse_kraken_file(handle)

            for row in kraken_data:
                field_names.add(row['ncbi_taxid'])
                input_otu_counts[input][row['ncbi_taxid']] += 1

    field_names = ['input'] + sorted([ i for i in field_names ])

    with open(args.output, 'w') as handle:
        writer = DictWriter(handle,
                            fieldnames=field_names)

        writer.writeheader()

        for input, otu_counts in list(input_otu_counts.items()):
            otu_counts['input'] = input
            writer.writerow(otu_counts)
def main():
    layout = construct_layout(OFF_PROPERTY_LAYOUT)
    header = get_active_header(OFF_PROPERTY_LAYOUT)

    # Prepare CSV output to stdout
    writer = DictWriter(stdout, fieldnames=header)
    writer.writeheader()

    parse = Struct(layout).unpack_from
    struct_length = calcsize(layout)

    for line in get_stdin_bytes().readlines():
        # Ensure string length is what deconstructer expects
        if len(line) != struct_length:
            line = '{:<{}s}'.format(line.decode(), struct_length).encode()

        # Deconstruct fixed-width string
        row = parse(line)

        # Decode each value
        row = (v.decode('ascii', 'ignore') for v in row)

        # Trim whitespace in each field
        row = [field.strip() for field in row]

        # Convert to dict using header
        row = dict(zip(header, row))

        writer.writerow(row)
Ejemplo n.º 9
0
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", 'w'), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x['id'] for x in self.test], self.predictions):
         d = {'id': ii, 'position': pp}
         o.writerow(d)
def test_behavior_strategy(b: Behavior, s: Strategy, size=20):
    TRIALS = 10**2
    results = []
    start = time()
    dynamic = False
    for _ in range(TRIALS):
        r = MemoryManager(s, size, dynamic).handle_string(generate_list(b))
        results.append(r)
    end = time()
    avg_time = (end - start)/TRIALS
    print('Average time: ', avg_time)
    print('Minimum no. page faults: ', min(results))
    print('Maximum no. page faults: ', max(results))
    avg = sum(results)/len(results)
    print('Average no. page faults: ', avg)
    with open('benchmarks.csv', 'r') as record_file:
        data = DictReader(record_file)
        entries = [i for i in data]
    entry_fields = ['Behavior', 'Strategy', 'Res. Set Size', 'Faults']
    new_entry = {'Behavior': b.name, 'Strategy': s.name, 'Res. Set Size': size, 'Faults': int(avg)}
    entries.append(new_entry)
    entries = sorted(entries, key=itemgetter('Behavior', 'Strategy'))
    with open('benchmarks.csv', 'w', newline='') as record_file:
        writer = DictWriter(record_file, entry_fields)
        writer.writeheader()
        writer.writerows(entries)
Ejemplo n.º 11
0
def convert_powertracker_log_to_csv(path):
    """
    This function creates a CSV file (to ./results) from a PowerTracker log file (from ./data).
    This is inspired from https://github.com/sieben/makesense/blob/master/makesense/parser.py.

    :param path: path to the experiment (including [with-|without-malicious])
    """
    platforms = [p.capitalize() for p in get_available_platforms()]
    data, results = join(path, 'data'), join(path, 'results')
    with open(join(data, 'powertracker.log')) as f:
        log = f.read()
    iterables, fields = [], ['mote_id']
    for it in PT_ITEMS:
        time_field = '{}_time'.format(it)
        iterables.append(finditer(PT_REGEX.format('|'.join(platforms), it.upper(), time_field), log, MULTILINE))
        fields.append(time_field)
    with open(join(results, 'powertracker.csv'), 'w') as f:
        writer = DictWriter(f, delimiter=',', fieldnames=fields)
        writer.writeheader()
        for matches in zip(*iterables):
            row = {}
            for m in matches:
                row.update((k, int(v)) for k, v in m.groupdict().items())
            for it in PT_ITEMS:
                time_field = '{}_time'.format(it)
                row[time_field] = float(row[time_field] / 10 ** 6)
            writer.writerow(row)
Ejemplo n.º 12
0
def job_result_csv(job_id):
    db_session = db.get_session()
    db_job = db_session.query(PersistentJob).get(job_id)
    if not db_job:
        return json_error('no task exists with id: {0}'.format(job_id))
    celery_task = Job.task.AsyncResult(db_job.result_key)
    if celery_task.ready():
        task_result = celery_task.get()
        
        csv_io = StringIO()
        if task_result:
            # if task_result is not empty find header in first row
            fieldnames = ['user_id'] + sorted(task_result.values()[0].keys())
        else:
            fieldnames = ['user_id']
        writer = DictWriter(csv_io, fieldnames)
        
        task_rows = []
        # fold user_id into dict so we can use DictWriter to escape things
        for user_id, row in task_result.iteritems():
            row['user_id'] = user_id
            task_rows.append(row)
        writer.writeheader()
        writer.writerows(task_rows)
        app.logger.debug('celery task is ready! returning actual result:\n%s', csv_io.getvalue())
        return Response(csv_io.getvalue(), mimetype='text/csv')
    else:
        return json_response(status=celery_task.status)
Ejemplo n.º 13
0
def _stats_data_csv(user_profile, req_input, client, ignored, stats_type, is_custom):

    n_type_keys = {
        'mean': ['start', 'stop', 'service_name', 'mean', 'mean_all_services',
                  'usage_perc_all_services', 'time_perc_all_services', 'all_services_usage', 'mean_trend'],
        'usage': ['start', 'stop', 'service_name', 'usage', 'rate', 'usage_perc_all_services',
                  'time_perc_all_services', 'all_services_usage', 'usage_trend'],
        }

    buff = StringIO()
    writer = DictWriter(buff, n_type_keys[req_input.n_type], extrasaction='ignore')
    writer.writeheader()

    for stat in _get_stats(client, req_input.utc_start, req_input.utc_stop, req_input.n, req_input.n_type, stats_type):
        d = stat.to_dict()
        d['start'] = req_input.user_start
        d['stop'] = req_input.user_stop if stats_type == 'trends' or is_custom else ''
        writer.writerow(d)

    out = buff.getvalue()
    buff.close()

    response = HttpResponse(out, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename={}'.format('zato-stats.csv')

    return response
def run():
    output = open(sys.argv[1], 'w')
    writer = DictWriter(output, fieldnames=['uid', 'data'])
    writer.writeheader()
    db = DB(dbconfig)

    for uid in fetch_users(db):
        data = fetch_user_location_logs(uid, db)
        locations = merge_locations(data)
        matrix = generate_matrix(locations)
        semantic_data = fetch_semantic_data(list(matrix.keys()))
        semantic_dict = {}
        for row in semantic_data:
            semantic_dict[row['location']] = clean_tags(row['tags'], 5)
        tag_matrix = {}
        for location, proba in list(matrix.items()):
            tag_dict = semantic_dict[location]
            tag_weight = sum(v for v in list(tag_dict.values()))
            if tag_weight == 0:
                continue
            for tag, cnt in list(tag_dict.items()):
                tag_matrix.setdefault(tag, [0] * 48)
                for i in range(48):
                    tag_matrix[tag][i] += (proba[i] * cnt + 0.001) / (tag_weight + 0.001)
        writer.writerow({
            'uid': uid,
            'data': json.dumps(tag_matrix)
        })
    output.close()
    def handle_noargs(self, **options):
        print "in the command..."

        comment_keys = ('user_key', 'g', 'agroup', 'user_key', 'experiment_slug', 'variant', 'via')

        petition_headers = comment_keys + ('name', 'email')
        # Petition signatories from the first two experiments
        for filename, url_path in [
            ('petition-1.csv', '/county-performance/petition'),
            ('petition-2.csv', '/county-performance-2/petition'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, petition_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['name'] = comment
                    row_data['email'] = f.email
                    writer.writerow(row_data)

        senate_headers = comment_keys + ('comment',)
        for filename, url_path in [
            ('senate-1.csv', '/county-performance/senate'),
            ('senate-2.csv', '/county-performance-2/senate'),
        ]:
            with open(filename, "wb") as f:
                writer = DictWriter(f, senate_headers)
                writer.writeheader()
                for f in Feedback.objects.filter(url__endswith=url_path):
                    data, comment = unpack_comment(f.comment)
                    row_data = data.copy()
                    row_data['comment'] = comment
                    writer.writerow(row_data)
Ejemplo n.º 16
0
def process_vf(loc_data):
	precinct_data = {}
	with open(Files.VF_CUT.format(**loc_data), "r") as r, open(Files.VF_DEDUPED.format(**loc_data), "w") as w:
		reader = DictReader(r, dialect='excel-tab')
		writer = DictWriter(w, fieldnames=Headers.VF_DEDUPED)
		writer.writeheader()
		vf_hashes = set()
		p_count = 0
		for row in reader:
			if len(loc_data['county']) > 0 and not row['vf_county_name'].upper() == loc_data['county'].upper():
				continue
			vf_hash = get_hash(row, HashFields.VF)
			if vf_hash in vf_hashes:
				continue
			vf_hashes.add(vf_hash)
			vfp_hash = get_hash(row, HashFields.VFP)
			row_zip = row['vf_reg_cass_zip']
			if vfp_hash not in precinct_data:
				p_count += 1
				precinct_data[vfp_hash] = get_conversion(row, Conversions.VFP)
				precinct_data[vfp_hash]['vf_precinct_id'] = Prefixes.PRECINCT + str(p_count)
				precinct_data[vfp_hash]['zips'] = {row_zip:1}
				precinct_data[vfp_hash]['examples'] = []
			elif row_zip not in precinct_data[vfp_hash]['zips']:
				precinct_data[vfp_hash]['zips'][row_zip] = 1
			else:
				precinct_data[vfp_hash]['zips'][row_zip] += 1
			vf_output = get_conversion(row, Conversions.VF)
			if len(precinct_data[vfp_hash]['examples']) < 5:
				precinct_data[vfp_hash]['examples'].append(vf_output)
			vf_output["vf_precinct_id"] = precinct_data[vfp_hash]['vf_precinct_id'] 
			vf_output["vf_id"] = str(Prefixes.VF + row["voterbase_id"][3:])
			writer.writerow(vf_output)
	return precinct_data
Ejemplo n.º 17
0
def get_vf_precincts(loc_data, precinct_data):
	with open(Files.VF_PRECINCTS.format(**loc_data), "w") as vfp_w, open(Files.VF_EX_PRECINCTS.format(**loc_data), "w") as vfep_w:
		vfp_writer = DictWriter(vfp_w, fieldnames=Headers.VFP)
		vfp_writer.writeheader()
		vfep_writer = DictWriter(vfep_w, fieldnames=Headers.VFEP)
		vfep_writer.writeheader()
		for key, vfp_dict in precinct_data.iteritems():
			zips = vfp_dict.pop('zips')
			max_count = 0
			max_zip = 0
			total_count = 0
			for zip_val, zip_count in zips.iteritems():
				total_count += zip_count
				if zip_count > max_count:
					max_count = zip_count
					max_zip = zip_val
			vfp_dict['vf_precinct_zip'] = max_zip
			vfp_dict['vf_precinct_count'] = total_count
			examples = vfp_dict.pop('examples')
			vfp_writer.writerow(vfp_dict)
			ex_count = 1
			for ex in examples:
				for key in Conversions.VF_EX:
					vfp_dict[Prefixes.VFP_EX.format(ex_count)+key] = ex[key]
				ex_count += 1
			vfep_writer.writerow(vfp_dict)
Ejemplo n.º 18
0
def output_results(poi_result_set, screen=True, outfile=None):
    """
    Outputs unified DBSCAN results to screen or csv file.
    The screen only shows major data elements. The CSV file has the 
    complete dictionary (i.e., base dictionay plus ZOA attributes for each POI)
    """
    assert not isinstance(poi_result_set, basestring), 'POI result set is not list or tuple'

    if screen:
        print "\nZOAs by POI"
        print "="*80,
        for poi in poi_result_set:                
            print "\nLocation:\t%s" % poi[s.NAME_KEY]
            print "Address:\t%s" % poi[s.ADDR_KEY]
            print "Neighborhood:\t%s" % poi[s.NBHD_KEY]
            print "Coordinates:\t%.4f, %.4f" % (poi[s.LAT_KEY], poi[s.LNG_KEY])
            print "ZOA ID:\t\t%d" % poi[s.ZOA_KEY] 
        
    if outfile:
        assert isinstance (outfile, str), "Outfile name is not a string: %r" % name
        if outfile[-4:] != '.csv': outfile += '.csv'
        with open(outfile, 'wb') as f:
            target = DictWriter(f, poi_result_set[0].keys())
            target.writeheader()
            target.writerows(poi_result_set)
        print "\nWrote output to %s.\n" % outfile
Ejemplo n.º 19
0
def plot_file(filename1):
    base_name = os.path.basename(filename1)
    name_parts = base_name.split('_')
    work_path = os.path.dirname(__file__)
    scores_filename = os.path.join(
        work_path,
        '_'.join(name_parts[:2] + ['v3loop_scores.csv']))
    if os.path.exists(scores_filename):
        with open(scores_filename) as f:
            reader = DictReader(f)
            score_rows = [list(map(int, row))
                          for row in map(itemgetter('score', 'count'), reader)]
    else:
        source1 = os.path.join('micall/tests/working/v3loop_alignment_scores/',
                               filename1)
        source2 = source1.replace('_R1_', '_R2_')
        start = datetime.now()
        with open(source1) as fastq1, open(source2) as fastq2:
            score_counts = align_reads(fastq1, fastq2)
        print('{}: {}'.format(datetime.now() - start, filename1))
        score_rows = sorted(score_counts.items())
        with open(scores_filename, 'w') as scores_csv:
            writer = DictWriter(scores_csv,
                                ('score', 'count'),
                                lineterminator=os.linesep)
            writer.writeheader()
            for score, count in score_rows:
                writer.writerow(dict(score=score, count=count))
    scores = [row[0] for row in score_rows]
    counts = [row[1] for row in score_rows]
    total_count = float(sum(counts))
    fractions = [count/total_count for count in counts]
    plt.plot(scores, fractions, label=base_name.split('_')[0], alpha=0.7)
Ejemplo n.º 20
0
def main(infile, outfile):
    with open(infile) as inf, open(outfile, "w") as outf:
        r = DictReader(inf)
        rows = [process(row) for row in r]
        w = DictWriter(outf, fieldnames=rows[0].keys())
        w.writeheader()
        w.writerows(rows)
Ejemplo n.º 21
0
def run(args):

    opts = parse_args(args)
    opts.prefix = opts.prefix or opts.graph.split('.', 1)[0]

    sheets = {}
    sheet_headers = {}

    try:
        with file(opts.graph) as csvfile:
            for row in reader(csvfile):
                fv = dict(column.split('=', 1) for column in row)
                entity_type = fv.pop('Entity Type')
                headers = fv.keys()
                if entity_type not in sheets:
                    sheets[entity_type] = [fv]
                    sheet_headers[entity_type] = set(headers)
                    continue
                else:
                    sheets[entity_type].append(fv)
                if len(headers) > len(sheet_headers[entity_type]):
                    sheet_headers[entity_type].union(headers)

        for entity_type in sheets:
            with open('%s_%s.csv' % (opts.prefix, entity_type), 'wb') as csvfile:
                csv = DictWriter(csvfile, sheet_headers[entity_type])
                csv.writeheader()
                csv.writerows(sheets[entity_type])
    except IOError, e:
        print 'csv2sheets: %s' % e
        exit(-1)
Ejemplo n.º 22
0
def run(out_file, input_dirs):
    assert input_dirs
    files = []
    for input_dir in input_dirs:
        for filename in listdir(input_dir):
            if filename.endswith(".log"):
                files.append(join(input_dir, filename))
    assert files

    raw_data = sorted((get_data(filename) for filename in files), key=data_key)

    aggregated_data = (
        indy_vars + aggregate_data(group) for (_key, indy_vars), group in
        groupby(raw_data, partial(data_key, include_id=False))
    )

    with open(out_file, "w") as f:
        out = DictWriter(f, independent_vars + dependent_vars)
        out.writeheader()
        out.writerows(raw_data)
    del out

    with open("-aggregate".join(splitext(out_file)), "w") as f:
        out = writer(f)
        out.writerow(independent_vars[:-1] + ("count",) + dependent_vars)
        out.writerows(aggregated_data)
def main():
    # We open the 2000 file first because it has the headers
    print("Reading from:", SRC_PATHS['2000'])
    csv2000 = DictReader(SRC_PATHS['2000'].read_text().splitlines())
    # awkward but whatever. We need to use csv2000's headers
    # and add the 'year' column to it
    destfile = DEST_PATH.open('w')
    destcsv = DictWriter(destfile, fieldnames=['year'] + csv2000.fieldnames)
    destcsv.writeheader()
    for i, row in enumerate(csv2000):
        row['year'] = 2000
        destcsv.writerow(row)
    print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH))

    # now we open 1990 file and iterate
    print("Reading from:", SRC_PATHS['1990'])
    for i, line in enumerate(SRC_PATHS['1990'].read_text().splitlines()):
        name, freq, cumfreq, rank = re.search(RX_ROW_1990, line).groups()
        row = { 'name': name.strip(),
                'rank': int(rank),
                'year': 1990,
                'prop100k': int(float(freq) * 1000),
                'cum_prop100k': int(float(cumfreq) * 1000),
              }
        destcsv.writerow(row)
    print("Wrote {0} lines to: {1}".format(i+1, DEST_PATH))
    # all done
    destfile.close()
Ejemplo n.º 24
0
def main():
    search_par_h = open("data/search_params.csv", "w")
    writer = DictWriter(search_par_h, fieldnames=["SearchID", "SearchParams"])
    writer.writeheader()
    for t, row in read_tsv("data/SearchInfo.tsv"):
        sparams = row["SearchParams"]
        if not sparams:
            continue
        sid = int(row["SearchID"])
        sparams = re.sub(r"([A-Za-z0-9]+):", r'"\1":', sparams)
        sparams = sparams.replace("'", "\"")
        sparams = sparams.replace("Минивэн\",", "\"Минивэн\",")
        sparams = sparams.replace("Микроавтобус\"]", "\"Микроавтобус\"]")
        sparams = unicode(sparams, "utf-8")
        try:
            sparams = json.loads(sparams)
            for k, v in sparams.items():
                t = type(v)
                if t not in type_set:
                    print t, k, v
                    type_set.add(t)
            sparams_str = json.dumps(sparams)
            writer.writerow({"SearchID": sid, "SearchParams": sparams_str})
        except Exception as e:
            print e
            print sparams
Ejemplo n.º 25
0
def customer_stats(outfile=None):
    sales = sales_grouped_by_users()

    stats = {}
    for user_id, items in sales:
        item_list = list(items)
        data = {}
        data['user_id'] = user_id
        data['n_lines'] = len(item_list)
        #all orders
        fill_items(data, item_list, suffix='')
        #online orders
        item_list_online = [i for i in item_list if i['online_order_number']]
        fill_items(data, item_list_online, suffix='_online')
        # sale items
        item_list_on_sale = [i for i in item_list if i['on_sale'] == 't']
        fill_items(data, item_list_on_sale, suffix='_on_sale')

        stats[user_id] = data

    if outfile is not None:
        fieldnames = sorted(data.keys())
        dw = DictWriter(open(outfile, 'w'), fieldnames=fieldnames)
        dw.writeheader()
        for user_id, row in stats.iteritems():
            dw.writerow(row)

    return stats.values()
def write_csv(output_file, address_dicts):
    geocoded_file = open(output_file, 'wb')
    writer = DictWriter(geocoded_file, fieldnames=address_dicts[0].keys(), \
        dialect='excel', lineterminator='\n')
    writer.writeheader()
    writer.writerows(address_dicts)
    geocoded_file.close() 
def write_output():
    with open(
        "urls-used-for-local-transactions-with-statuses-and-jumbled-urls-and-pageviews.csv", "w", encoding="utf8"
    ) as output:
        writer = DictWriter(output, fields)
        writer.writeheader()
        writer.writerows(urls_used_with_pageviews)
Ejemplo n.º 28
0
def write_data(outfile, data, fields):

    with open(outfile, 'w') as outfile:
        writer = DictWriter(outfile, fieldnames=fields)
        writer.writeheader()
        for d in data:
            writer.writerow(d)
 def writePredictions(self):
     print "In writePredictions"
     o = DictWriter(open("predictions.csv", "w"), ["id", "position"])
     o.writeheader()
     for ii, pp in zip([x["id"] for x in self.test], self.predictions):
         d = {"id": ii, "position": pp}
         o.writerow(d)
def main():

    parser = ArgumentParser()
    parser.add_argument('--csvfile', '-c', default=None, metavar='F',
                        help='csv file containing trybooking report')
    parser.add_argument('--verbose', '-v', action='store_true',
                        help='print verbose messages')
    args = parser.parse_args()

    csvinput = args.csvfile
    if csvinput is None:
        csvinput, _ = latest_report(
            None, rdir=develdir,
            nre=r'^(\d{8}).csv$',
            n2dt=lambda m: datetime.strptime(m.group(1), '%d%m%Y'),
            verbose=args.verbose
        )
        if csvinput is None:
            raise RuntimeError('no trybooking report found!')
        if args.verbose:
            print(
                '[trybooking report selected: {}]'.format(csvinput),
                file=sys.stderr
            )

    with open(csvinput, 'r', newline='') as infile:

        _ = infile.read(1)

        reader = DictReader(infile)

        orecs = []

        for inrec in reader:

            if inrec['Void'] == 'Yes':
                if args.verbose:
                    print(
                        'ignore VOID record: {}'.format(inrec),
                        file=sys.stderr
                    )
                continue

            name = inrec['Ticket Data: Player\'s First Name'] + ' ' + \
                inrec['Ticket Data: Player\'s Surname']
            date_of_birth = inrec['Ticket Data: Player\'s Date-of-Birth']
            paid = inrec['Net Booking']
            medical = inrec[
                'Ticket Data: Special Requirements/Medical Conditions'
            ]

            isparent = (
                inrec['Ticket Data: Purchaser is Player\'s Parent/Guardian']
                ==
                'Yes'
            )
            if isparent:
                parent = inrec['Booking First Name'] + ' ' + \
                    inrec['Booking Last Name']
                address = make_address(
                    inrec['Booking Address 1'],
                    inrec['Booking Address 2'],
                    inrec['Booking Suburb'],
                    inrec['Booking Post Code'],
                )
                phone = inrec['Booking Telephone']
                email = inrec['Booking Email']
            else:
                parent = inrec['Ticket Data: Parent/Guardian Name']
                address = inrec['Ticket Data: Parent/Guardian Address']
                phone = inrec['Ticket Data: Parent/Guardian Phone']
                email = inrec['Ticket Data: Parent/Guardian Email']

            orecs.append(
                dict(
                    paid=paid,
                    name=name,
                    date_of_birth=date_of_birth,
                    parent=parent,
                    email=email,
                    phone=make_phone(phone),
                    address=address.title(),
                    medical=medical,
                )
            )

    if len(orecs) == 0:
        print('No CSV records in "{}"'.format(csvinput))
        sys.exit(0)

    with TextIOWrapper(sys.stdout.buffer, newline='') as outfile:

        writer = DictWriter(outfile, fieldnames=orecs[0].keys())

        writer.writeheader()

        for outrec in orecs:

            writer.writerow(outrec)

    return 0
Ejemplo n.º 31
0
def trim5p3p(records, output_prefix):
    f_FL = open(output_prefix + '.fl.fasta', 'w')
    f_FL_clips = open(output_prefix + '.fl.clips', 'w')
    f_nFL = open(output_prefix + '.nfl.fasta', 'w')
    f_csv = open(output_prefix + '.csv', 'w')
    writer = DictWriter(f_csv,
                        fieldnames=['id', 'end5', 'end3', 'endA', 'strand'])
    writer.writeheader()

    for r in records:
        r2 = r.reverse_complement()
        r2.id = r.id
        t1 = trim5p3p_helper(r)
        t2 = trim5p3p_helper(r2)

        is_fl_flag1 = t1.score5 >= MINSCORE_5P and t1.score3 >= MINSCORE_3P and (
            MIN_A_LEN == 0 or t1.endA != t1.end3)
        is_fl_flag2 = t2.score5 >= MINSCORE_5P and t2.score3 >= MINSCORE_3P and (
            MIN_A_LEN == 0 or t2.endA != t2.end3)

        if is_fl_flag1:
            if is_fl_flag2:
                if t1.score5 + t1.score3 > t2.score5 + t2.score3:
                    strand = '+'
                else:
                    strand = '-'
            else:  # pick t1
                strand = '+'
        elif is_fl_flag2:
            strand = '-'
        else:
            strand = 'NA'

        info = {
            'id': r.id,
            'end5': 'NA',
            'end3': 'NA',
            'endA': 'NA',
            'strand': 'NA'
        }

        if strand == '+':
            info['strand'] = '+'
            info['end5'] = t1.end5
            info['end3'] = t1.end3
            info['endA'] = t1.endA
            f_FL.write(">{0}\n{1}\n".format(r.id, r.seq[t1.end5:t1.endA]))
            f_FL_clips.write(">{0}_5p strand:+ score:{1}\n{2}\n".format(
                r.id, t1.score5, r.seq[:t1.end5]))
            f_FL_clips.write(">{0}_3p strand:+ score:{1}\n{2}\n".format(
                r.id, t1.score3, r.seq[t1.endA:]))
        elif strand == '-':
            info['strand'] = '-'
            info['end5'] = t2.end5
            info['end3'] = t2.end3
            info['endA'] = t2.endA
            f_FL.write(">{0}\n{1}\n".format(r2.id, r2.seq[t2.end5:t2.endA]))
            f_FL_clips.write(">{0}_5p strand:- score:{1}\n{2}\n".format(
                r.id, t2.score5, r2.seq[:t2.end5]))
            f_FL_clips.write(">{0}_3p strand:- score:{1}\n{2}\n".format(
                r.id, t2.score3, r2.seq[t2.endA:]))
        else:
            # non-fL, but we still wanna trim away the stuff
            if t1.score5 + t1.score3 > t2.score5 + t2.score3:
                f_nFL.write(">{0} strand:+?\n{1}\n".format(
                    r.id, r.seq[t1.end5:t1.endA]))
            else:
                f_nFL.write(">{0} strand:-?\n{1}\n".format(
                    r2.id, r2.seq[t2.end5:t2.endA]))
        writer.writerow(info)
    f_csv.close()
    f_FL.close()
    f_FL_clips.close()
    f_nFL.close()
Ejemplo n.º 32
0
def ensure_history_file_exists():
    """Удостовериться что файл для истории запросов есть"""
    if not exists(HISTORY_PATH):
        with open(HISTORY_PATH, 'a+') as tfile:
            writer = DictWriter(tfile, CSV_FIELDS)
            writer.writeheader()
Ejemplo n.º 33
0
    with open(my_file) as csvfile:
        records = []
        waitlist = DictReader(csvfile)
        for row in waitlist:
            records.append(row)

    column_headers = records[0].keys()

    input = input('Enter the column header you would like to split: \n')

    if input not in column_headers:
        print("Input supplied not in column headings.... exiting.")
        sys.exit(1)

    for record in records:
        target = record[input]
        split_names = target.split(' ')
        del record[input]
        record['first %s' % input] = split_names[0]
        record['last %s' % input] = ''
        if len(split_names) > 1:
            record['last %s' % input] = ' '.join(split_names[1:])

    output_filename = 'outfiles/waitlist-%s.csv' % (''.join(
        choice(ascii_lowercase) for i in range(4)))

    with open(output_filename, 'w') as outfile:
        writer = DictWriter(outfile, records[0].keys())
        writer.writeheader()
        writer.writerows(records)
def evaluate_alignment_sam(input_fa_or_fq,
                           sam_filename,
                           genome_d,
                           output_prefix,
                           junction_info=None):

    h1 = open(output_prefix + '.alignment_report.txt', 'w')
    h2 = open(output_prefix + '.junction_report.txt', 'w')

    w1 = DictWriter(h1, fieldnames=fieldnames_report1)
    w2 = DictWriter(h2, fieldnames=fieldnames_report2)
    w1.writeheader()
    w2.writeheader()

    #fieldnames_report1 = ['seqid', 'coverage', 'identity', 'num_sub', 'num_ins', 'num_del', 'num_exons']
    #fieldnames_report2 = ['seqid', 'donor_pos', 'donor_seq', 'donor_dist', 'acceptor_pos', 'acceptor_seq', 'acceptor_dist']

    query_len_dict = dict((r.id, len(r.seq)) for r in SeqIO.parse(
        open(input_fa_or_fq), type_fa_or_fq(input_fa_or_fq)))
    for r in GMAPSAMReader(sam_filename, True, query_len_dict=query_len_dict):
        if r.sID == '*':  # unaligned
            rec1 = {
                'seqid': r.qID,
                'coverage': 'NA',
                'identity': 'NA',
                'num_sub': 'NA',
                'num_ins': 'NA',
                'num_del': 'NA',
                'num_exons': 'NA'
            }
            w1.writerow(rec1)
            continue
        rec1 = {
            'seqid': r.qID,
            'coverage': r.qCoverage,
            'identity': r.identity,
            'num_sub': r.num_nonmatches - r.num_del - r.num_ins,
            'num_ins': r.num_ins,
            'num_del': r.num_del,
            'num_exons': len(r.segments)
        }
        w1.writerow(rec1)
        for i in range(0, len(r.segments) - 1):
            rec2 = {'seqid': r.qID}
            seq1, seq2 = get_donor_acceptor(genome_d, r.sID, r.flag.strand,
                                            r.segments[i].end - 1,
                                            r.segments[i + 1].start)
            if r.flag.strand == '+':
                rec2['donor_pos'] = "{0}:+:{1}".format(r.sID,
                                                       r.segments[i].end - 1)
                rec2['acceptor_pos'] = "{0}:+:{1}".format(
                    r.sID, r.segments[i + 1].start)
            else:
                rec2['donor_pos'] = "{0}:-:{1}".format(r.sID,
                                                       r.segments[i + 1].start)
                rec2['acceptor_pos'] = "{0}:-:{1}".format(
                    r.sID, r.segments[i].end - 1)
            rec2['donor_seq'] = seq1
            rec2['acceptor_seq'] = seq2
            if junction_info is not None:
                rec2['donor_dist'], rec2[
                    'acceptor_dist'] = get_closest_junction_dist(
                        junction_info, r.sID, r.flag.strand,
                        r.segments[i].end - 1, r.segments[i + 1].start)
            else:
                rec2['donor_dist'] = 'NA'
                rec2['acceptor_dist'] = 'NA'
            w2.writerow(rec2)
Ejemplo n.º 35
0
def run_optimization(atoms: Atoms,
                     dihedrals: List[DihedralInfo],
                     n_steps: int,
                     calc: Calculator,
                     init_steps: int,
                     out_dir: Optional[Path],
                     relax: bool = True) -> Atoms:
    """Optimize the structure of a molecule by iteratively changing the dihedral angles

    Args:
        atoms: Atoms object with the initial geometry
        dihedrals: List of dihedral angles to modify
        n_steps: Number of optimization steps to perform
        init_steps: Number of initial guesses to evaluate
        calc: Calculator to pick the energy
        out_dir: Output path for logging information
        relax: Whether to relax non-dihedral degrees of freedom each step
    Returns:
        (Atoms) optimized geometry
    """
    # Perform an initial relaxation
    _, init_atoms = relax_structure(atoms, calc)
    if out_dir is not None:
        with open(out_dir.joinpath('relaxed.xyz'), 'w') as fp:
            simple_write_xyz(fp, [init_atoms])

    # Evaluate initial point
    start_coords = np.array([d.get_angle(init_atoms) for d in dihedrals])
    start_energy, start_atoms = evaluate_energy(start_coords, atoms, dihedrals,
                                                calc, relax)
    logger.info(f'Computed initial energy: {start_energy}')

    # Begin a structure log, if output available
    if out_dir is not None:
        log_path = out_dir.joinpath('structures.csv')
        ens_path = out_dir.joinpath('ensemble.xyz')
        with log_path.open('w') as fp:
            writer = DictWriter(fp, ['time', 'xyz', 'energy', 'ediff'])
            writer.writeheader()

        def add_entry(coords, atoms, energy):
            with log_path.open('a') as fp:
                writer = DictWriter(
                    fp, ['time', 'coords', 'xyz', 'energy', 'ediff'])
                xyz = StringIO()
                simple_write_xyz(xyz, [atoms])
                writer.writerow({
                    'time': datetime.now().timestamp(),
                    'coords': coords.tolist(),
                    'xyz': xyz.getvalue(),
                    'energy': energy,
                    'ediff': energy - start_energy
                })
            with ens_path.open('a') as fp:
                simple_write_xyz(fp, [atoms], comment=f'\t{energy}')

        add_entry(start_coords, start_atoms, start_energy)

    # Make some initial guesses
    init_guesses = np.random.normal(start_coords,
                                    30,
                                    size=(init_steps, len(dihedrals)))
    init_energies = []
    for i, guess in enumerate(init_guesses):
        energy, cur_atoms = evaluate_energy(guess, start_atoms, dihedrals,
                                            calc, relax)
        init_energies.append(energy - start_energy)
        logger.info(
            f'Evaluated initial guess {i+1}/{init_steps}. Energy-E0: {energy-start_energy}'
        )

        if out_dir is not None:
            add_entry(guess, cur_atoms, energy)

    # Save the initial guesses
    observed_coords = [start_coords, *init_guesses.tolist()]
    observed_energies = [0.] + init_energies

    # Loop over many steps
    cur_atoms = start_atoms.copy()
    for step in range(n_steps):
        # Make a new search space
        best_coords = select_next_points_botorch(observed_coords,
                                                 observed_energies)

        # Compute the energies of those points
        energy, cur_atoms = evaluate_energy(best_coords, cur_atoms, dihedrals,
                                            calc, relax)
        logger.info(
            f'Evaluated energy in step {step+1}/{n_steps}. Energy-E0: {energy-start_energy}'
        )
        if energy - start_energy < np.min(
                observed_energies) and out_dir is not None:
            with open(out_dir.joinpath('current_best.xyz'), 'w') as fp:
                simple_write_xyz(fp, [cur_atoms])

        # Update the log
        if out_dir is not None:
            add_entry(start_coords, cur_atoms, energy)

        # Update the search space
        observed_coords.append(best_coords)
        observed_energies.append(energy - start_energy)

    # Final relaxations
    best_atoms = cur_atoms.copy()
    best_coords = observed_coords[np.argmin(observed_energies)]
    best_energy, best_atoms = evaluate_energy(best_coords, best_atoms,
                                              dihedrals, calc)
    logger.info('Performed final relaxation with dihedral constraints.'
                f'E: {best_energy}. E-E0: {best_energy - start_energy}')
    if out_dir is not None:
        add_entry(np.array(best_coords), best_atoms, best_energy)

    # Relaxations
    best_atoms.set_constraint()
    best_energy, best_atoms = relax_structure(best_atoms, calc)
    logger.info('Performed final relaxation without dihedral constraints.'
                f' E: {best_energy}. E-E0: {best_energy - start_energy}')
    best_coords = np.array([d.get_angle(best_atoms) for d in dihedrals])
    if out_dir is not None:
        add_entry(best_coords, best_atoms, best_energy)
    return best_atoms
Ejemplo n.º 36
0
from csv import DictReader, DictWriter


def cm_to_in(cm):
    return round(float(cm) * 0.393701, 2)


with open("Datasets/fighters.csv") as file:
    csv_reader = DictReader(file)
    fighters = list(csv_reader)

with open("Datasets/inches_fighters.csv", "w") as file:
    headers = ("Name", "Country", "Height")
    csv_writer = DictWriter(file, fieldnames=headers, lineterminator='\n')
    csv_writer.writeheader()
    for fighter in fighters:
        csv_writer.writerow({
            "Name": fighter["Name"],
            "Country": fighter["Country"],
            "Height": cm_to_in(fighter["Height (in cm)"])
        })
            if cols == HEADERS:
                headers_found = True
        else:
            # headers have been found
            # don't capture anything if unless
            # nearly all cells are filled...
            #            if not all(c == '' for c in cols) and len(HEADERS) - len(cols) == 0:
            if 'Employee' in cols[1] or '$' in cols[2]:
                yield dict(zip(HEADERS, cols))


if __name__ == '__main__':
    parser = ArgumentParser("Convert WH salary XLS page(s) to CSV")
    parser.add_argument('inpath',
                        type=str,
                        help="Path to a XLSX file, or directory of them")
    args = parser.parse_args()
    inpath = args.inpath
    if isdir(inpath):
        filenames = glob(joinpath(inpath, '*.xls?'))
    else:
        filenames = [inpath]

    # set up the CSV
    csvout = DictWriter(stdout, fieldnames=HEADERS)
    csvout.writeheader()

    for fname in filenames:
        for d in process_wh_salary_workbook(fname):
            csvout.writerow(d)
Ejemplo n.º 38
0
                    )

        global_stats = Counter()
        global_stats_headers = set()

        for v in stats.values():
            global_stats_headers |= set(v.keys())

        fieldnames = ["notice_id"] + sorted(global_stats_headers)

        with open(os.path.join(outdir, "__detailed_stats.csv"),
                  "w") as f_detailed, open(
                      os.path.join(outdir, "__global_stats.json"),
                      "w") as f_global:
            w = DictWriter(f_detailed, fieldnames=fieldnames)
            w.writeheader()

            for k in natsorted(stats.keys()):
                global_stats.update(stats[k])
                row = {"notice_id": k}
                row.update(stats[k])
                w.writerow(row)

            json.dump(global_stats, f_global, indent=4, sort_keys=True)

        with open(os.path.join(outdir, "__detailed_stats.csv"),
                  "r") as f_in, open(
                      os.path.join(outdir, "__detailed_stats.txt"),
                      "w") as f_out:
            f_out.write(prettytable.from_csv(f_in).get_string())
Ejemplo n.º 39
0
def gaVRPTW(pop, instName, unitCost, waitCost, delayCost, speed, indSize, popSize, cxPb, mutPb, NGen, exportCSV=False, customizeData=False):
    if customizeData:
        jsonDataDir = os.path.join('C:\Users\s.janischka\PycharmProjects\py-ga-VRPTW\data', 'json_customize')
    else:
        jsonDataDir = os.path.join('C:\Users\s.janischka\PycharmProjects\py-ga-VRPTW\data', 'json')
    jsonFile = os.path.join(jsonDataDir, '%s.json' % instName)
    with open(jsonFile) as f:
        instance = load(f)

    # Operator registering
    toolbox.register('evaluate', core.evalVRPTW, instance=instance, unitCost=unitCost, waitCost=waitCost, delayCost=delayCost, speed=speed)
    toolbox.register('select', tools.selRoulette)
    toolbox.register('mate', core.cxPartialyMatched)
    toolbox.register('mutate', core.mutInverseIndexes)
    pop=pop

    # Results holders for exporting results to CSV file
    csvData = []
    print 'Start of evolution'
    # Evaluate the entire population
    fitnesses = list(toolbox.map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
    # Debug, suppress print()
    # print '  Evaluated %d individuals' % len(pop)
    # Begin the evolution
    for g in range(NGen):
        print '-- Generation %d --' % g
        # Select the next generation individuals
        # Select elite - the best offpsring, keep this past crossover/mutate
        elite = tools.selBest(pop, 1)
        # Select top 10% of all offspring
        # Roulette select the rest 90% of offsprings
        offspring = tools.selBest(pop, int(numpy.ceil(len(pop)*0.1)))
        offspringRoulette = toolbox.select(pop, int(numpy.floor(len(pop)*0.9))-1)
        offspring.extend(offspringRoulette)
        # Clone the selected individuals
        offspring = list(toolbox.map(toolbox.clone, offspring))
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cxPb:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if random.random() < mutPb:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        # Evaluate the individuals with an invalid fitness
        invalidInd = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalidInd)
        for ind, fit in zip(invalidInd, fitnesses):
            ind.fitness.values = fit
        # Debug, suppress print()
        # print '  Evaluated %d individuals' % len(invalidInd)
        # The population is entirely replaced by the offspring
        # Debug, printing offspring
        offspring.extend(elite)
        pop[:] = offspring
        
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        # Debug, suppress print()
        # print '  Min %s' % min(fits)
        # print '  Max %s' % max(fits)
        # print '  Avg %s' % mean
        # print '  Std %s' % std
        # Write data to holders for exporting results to CSV file
        if exportCSV:
            csvRow = {
                'generation': g,
                'evaluated_individuals': len(invalidInd),
                'min_fitness': min(fits),
                'max_fitness': max(fits),
                'avg_fitness': mean,
                'std_fitness': std,
                'avg_cost': 1 / mean,
            }
            csvData.append(csvRow)
    print '-- End of (successful) evolution --'
    bestInd = tools.selBest(pop, 1)[0]
    print 'Best individual: %s' % bestInd
    print 'Fitness: %s' % bestInd.fitness.values[0]
    core.printRoute(core.ind2route(bestInd, instance, speed))
    print 'Total cost: %s' % (1 / bestInd.fitness.values[0])
    if exportCSV:
        csvFilename = '%s_uC%s_wC%s_dC%s_iS%s_pS%s_cP%s_mP%s_nG%s.csv' % (instName, unitCost, waitCost, delayCost, indSize, popSize, cxPb, mutPb, NGen)
        csvPathname = os.path.join('results', csvFilename)
        print 'Write to file: %s' % csvPathname
        utils.makeDirsForFile(pathname=csvPathname)
        if not utils.exist(pathname=csvPathname, overwrite=True):
            with open(csvPathname, 'w') as f:
                fieldnames = ['generation', 'evaluated_individuals', 'min_fitness', 'max_fitness', 'avg_fitness', 'std_fitness', 'avg_cost']
                writer = DictWriter(f, fieldnames=fieldnames, dialect='excel')
                writer.writeheader()
                for csvRow in csvData:
                    writer.writerow(csvRow)
    return core.ind2route(bestInd, instance, speed)
Ejemplo n.º 40
0
def demux_by_barcode(samples,
                     fps,
                     dir_out,
                     path_counts,
                     path_details,
                     dry_run=False):
    """Demultiplex one trio of files from dictionaries of sample and barcode data.

    samples: dictionary of sample attributes for this run, with BarcodeFwdSeq
             and BarcodeRevSeq defined
    fps: dict of "I1", "R1", and "R2" keys pointing to file paths to
         fastq.gz inputs
    dir_out: output directory to write demultiplexed fastq.gz files to
    path_counts: path to csv to write per-sample read counts to.  If empty this
                 file isn't written.
    path_details: csv.gz file to write detailed per-read information.  If
                  empty, this file isn't written.
    dry_run: If True, don't actually call any commands or write any files.
    """
    counts = defaultdict(int)
    # nested dictionary of sample name -> trios of I1/R1/R2 paths
    # NOTE
    # with too many samples at once, this will cause an OS error due to too
    # many open files. In that case we'd have to open/close as needed.  It's
    # easy here to just open a bunch and store handles in a dictionary, though.
    fp_outs = {
        s: {rdid: Path(dir_out) / f"{s}.{rdid}.fastq.gz"
            for rdid in READS}
        for s in samples
    }
    fp_outs["None"] = {
        rdid: Path(dir_out) / f"unassigned.{rdid}.fastq.gz"
        for rdid in READS
    }
    for samp in fp_outs:
        LOGGER.info("output I1 for %s: %s", samp, fp_outs[samp]["I1"])
    # lookup table between pairs of barcodes and sample names
    bc_map = {(v["BarcodeFwdSeq"], v["BarcodeRevSeq"]): k
              for k, v in samples.items()}
    if not dry_run:
        Path(dir_out).mkdir(parents=True, exist_ok=True)
        try:
            f_outs = {
                s: {rdid: GZIP_OPEN(fp_outs[s][rdid], "wt")
                    for rdid in READS}
                for s in fp_outs
            }
            details_writer = None
            if path_details:
                Path(path_details).parent.mkdir(parents=True, exist_ok=True)
                f_details = GZIP_OPEN(path_details, "wt")
                details_writer = DictWriter(f_details,
                                            fieldnames=[
                                                "SeqID", "BarcodeFwdSeq",
                                                "BarcodeRevSeq",
                                                "BarcodeFwdQualMin",
                                                "BarcodeRevQualMin"
                                            ],
                                            lineterminator="\n")
                details_writer.writeheader()
            with GZIP_OPEN(fps["I1"], "rt") as f_i1, \
                    GZIP_OPEN(fps["R1"], "rt") as f_r1, \
                    GZIP_OPEN(fps["R2"], "rt") as f_r2:
                for trio in zip(
                        # each of these is a tuple of (seqid, seq, qual) text
                        FastqGeneralIterator(f_i1),
                        FastqGeneralIterator(f_r1),
                        FastqGeneralIterator(f_r2)):
                    trio = list(trio)
                    trio.extend([
                        assign_barcode_fwd(trio[1][1], BARCODES_FWD),
                        assign_barcode_rev(trio[0][1], BARCODES_REV)
                    ])
                    _write_chunk([trio], bc_map, f_outs, counts,
                                 details_writer)
        finally:
            for trio in f_outs.values():
                for f_rd in trio.values():
                    f_rd.close()
            if path_details:
                f_details.close()

        if path_counts:
            _write_counts(path_counts, counts)
Ejemplo n.º 41
0
 def get_citation_prov_csv(self):
     s_res = StringIO()
     writer = DictWriter(s_res, Citation.header_provenance_data)
     writer.writeheader()
     writer.writerow(loads(self.get_citation_prov_json()))
     return s_res.getvalue()
Ejemplo n.º 42
0
    def test_urls(self):

        headers = "in_url class url resource_url resource_file target_file scheme proto resource_format target_format " \
                  "is_archive encoding target_segment".split()

        import tempfile
        tf = tempfile.NamedTemporaryFile(prefix="rowgen", delete=False)
        temp_name = tf.name
        tf.close()

        # S3 URLS have these fields which need to be removed before writing to CSV files.
        def clean(do):

            for f in ['_orig_url', '_key', '_orig_kwargs', '_bucket_name']:
                try:
                    del do[f]
                except KeyError:
                    pass

        with open(data_path('url_classes.csv')) as f, open(temp_name,
                                                           'w') as f_out:
            w = None
            r = DictReader(f)
            errors = 0
            for i, d in enumerate(r):

                url = d['in_url']

                o = Url(url)

                do = dict(o.__dict__.items())
                del do['parts']

                if w is None:
                    w = DictWriter(f_out, fieldnames=headers)
                    w.writeheader()
                do['in_url'] = url
                do['is_archive'] = o.is_archive
                do['class'] = o.__class__.__name__
                clean(do)
                w.writerow(do)

                d = {k: v if v else None for k, v in d.items()}
                do = {k: str(v) if v else None
                      for k, v in do.items()}  # str() turns True into 'True'

                # a is the gague data from url_classes.csv
                # b is the test object.

                try:  # A, B
                    self.compare_dict(url, d, do)
                except AssertionError as e:
                    errors += 1
                    print(e)
                    # raise

            self.assertEqual(0, errors)

        with open(data_path('url_classes.csv')) as f:

            r = DictReader(f)
            for i, d in enumerate(r):
                u1 = Url(d['in_url'])

        with open(data_path('url_classes.csv')) as f:

            r = DictReader(f)
            for i, d in enumerate(r):
                u1 = Url(d['in_url'])
                d1 = u1.__dict__.copy()
                d2 = deepcopy(u1).__dict__.copy()

                # The parts will be different Bunch objects
                clean(d1)
                clean(d2)
                del d1['parts']
                del d2['parts']

                self.assertEqual(d1, d2)

                self.assertEqual(d1, u1.dict)

        for us in ("http://example.com/foo.zip",
                   "http://example.com/foo.zip#a;b"):
            u = Url(us, encoding='utf-8')
            u2 = u.update(target_file='bingo.xls', target_segment='1')

            self.assertEqual('utf-8', u2.dict['encoding'])
            self.assertEqual('bingo.xls', u2.dict['target_file'])
            self.assertEqual('1', u2.dict['target_segment'])
Ejemplo n.º 43
0
def filter_by_count(input_prefix,
                    output_prefix,
                    min_count,
                    dun_use_group_count=False):

    group_filename = input_prefix + '.group.txt'
    count_filename = input_prefix + '.abundance.txt'
    gff_filename = input_prefix + '.gff'
    rep_filename = input_prefix + '.rep.fq'

    if not dun_use_group_count:
        # read group
        group_max_count_fl = {}
        group_max_count_p = {}
        f = open(group_filename)
        for line in f:
            #ex: PB.1.1  i0HQ_54b0ca|c58773/f30p16/700
            pbid, members = line.strip().split('\t')
            group_max_count_fl[pbid] = 0
            group_max_count_p[pbid] = 0
            members = members.split(',')
            for m in members:
                i = m.find('|')
                if i > 0:
                    tmp = m.split('|')[1].split('/')[1]  #ex: tmp = f30p16
                else:
                    tmp = m.split('/')[1]
                fl_count, p_count = tmp.split('p')
                fl_count = int(fl_count[1:])
                p_count = int(p_count)
                group_max_count_fl[pbid] = max(group_max_count_fl[pbid],
                                               fl_count)
                group_max_count_p[pbid] = max(group_max_count_p[pbid], p_count)
        f.close()

    # read abundance first
    f = open(count_filename)
    count_header = ''
    while True:
        cur_pos = f.tell()
        line = f.readline()
        if not line.startswith('#'):
            f.seek(cur_pos)
            break
        else:
            count_header += line
    d = dict((r['pbid'], r) for r in DictReader(f, delimiter='\t'))
    for k, v in d.iteritems():
        print k, v
    f.close()

    # group_max_count_p NOT used for now
    good = filter(
        lambda x: int(d[x]['count_fl']) >= min_count and
        (dun_use_group_count or group_max_count_fl[x] >= min_count), d)

    # write output GFF
    f = open(output_prefix + '.gff', 'w')
    for r in GFF.collapseGFFReader(gff_filename):
        if r.seqid in good: GFF.write_collapseGFF_format(f, r)
    f.close()

    # write output rep.fq
    f = open(output_prefix + '.rep.fq', 'w')
    for r in SeqIO.parse(open(rep_filename), 'fastq'):
        if r.name.split('|')[0] in good:
            SeqIO.write(r, f, 'fastq')
    f.close()

    # write output to .abundance.txt
    f = open(output_prefix + '.abundance.txt', 'w')
    f.write(count_header)
    writer = DictWriter(f, fieldnames=['pbid','count_fl','count_nfl','count_nfl_amb','norm_fl','norm_nfl','norm_nfl_amb'], \
                        delimiter='\t', lineterminator='\n')
    writer.writeheader()
    for k in good:
        r = d[k]
        writer.writerow(r)
    f.close()

    print >> sys.stderr, "Output written to:", output_prefix + '.gff'
    print >> sys.stderr, "Output written to:", output_prefix + '.rep.fq'
    print >> sys.stderr, "Output written to:", output_prefix + '.abundance.txt'
    ANAND_index = 0

    right = 0
    total = len(dev_test)
    for ii in dev_test:
        prediction = classifier.classify(ii[0])
        if prediction == ii[1]:
            right += 1
        else:
            ANAND_writer.writerow(AP_TRAIN_LIST[ANAND_index] + [prediction])
        ANAND_index += 1
    sys.stderr.write("Accuracy on dev: %f\n" % (float(right) / float(total)))

    if testfile is None:
        sys.stderr.write("No test file passed; stopping.\n")
    else:
        # Retrain on all data
        classifier = nltk.classify.NaiveBayesClassifier.train(dev_train +
                                                              dev_test)

        # Read in test section
        test = {}
        for ii in DictReader(testfile, delimiter='\t'):
            test[ii['id']] = classifier.classify(fe.features(ii['text']))

        # Write predictions
        o = DictWriter(outfile, ['id', 'pred'])
        o.writeheader()
        for ii in sorted(test):
            o.writerow({'id': ii, 'pred': test[ii]})
Ejemplo n.º 45
0
def submit(req):
	#limpando lista
	lista_all.clear()

	#objeto para gerenciar arquivos de imagem
	fs = FileSystemStorage()
	
	""" 
	Verifica se a pasta 'media' existe, caso ela exista é excluída (pois irá possuir
	imagens de submissões anteriores.), caso ela já tenha sido excluída é então
	recriada mais uma vez.
	"""
	if(fs.exists(fs.location)):
		shutil.rmtree(fs.location)
	else:
		os.mkdir(fs.location)
	
	"""
	Irá verificar se foi passado na requisição um arquivo de imagem e se o method HTTP
	passado é do tipo POST
	"""
	if req.method == 'POST' and req.FILES['myFile']:
		myfile = req.FILES['myFile']#Põe a imagem em um objeto.

		"""
		O arquivo é salvo no método abaixo que retorna seu caminho para a variável filename.
		"""
		filename = fs.save(myfile.name, myfile)#retorna o nome do arquivo.extensão.

		uploaded_file_url = fs.url(filename)#retorna o caminho completo do nome de arquivo passado.

		module_dir = os.path.dirname(__file__)#pega o diretório atual do arquivo.
		file_path = os.path.join(module_dir, "yolofiles/yoloDados/")

		#objeto para gerenciar arquivos de imagem
		fs = FileSystemStorage()
		img_path = os.path.join(fs.location, filename)

		image = cv2.imread(img_path)
		
		#variaveis de captura
		h, w = None, None

		#carrega os arquivos com o nome dos objetos que foi treinado para identificar
		with open(f"{file_path}YoloNames.names") as f:
			#cria uma lista com todos os nomes
			labels = [line.strip() for line in f]

		#carrega arquivos treinados pelo framework
		network = cv2.dnn.readNetFromDarknet(f"{file_path}yolov3.cfg", f"{file_path}yolov3.weights")

		#captura ua lista com todos os nomes dos objetos treinados pelo framework
		layers_names_all = network.getLayerNames()

		#obtendo apenas o nome de camadas de saida que precisamos para o algoritmo Yolov3
		#com função de retornar o indice das camadas com saidas desconectadas

		layers_names_output = \
			[layers_names_all[i[0] - 1] for i in network.getUnconnectedOutLayers()]

		# Definir probabilidade minima para eliminar previsões fracas
		probability_minimum = 0.5

		#Definir limite para filtrar caixas delimitadoras fracas
		#com supressão não máxima
		threshold = 0.3

		#Gera cores aleatórias nas caixas de cada objeto detectados.
		colours = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")

		#loop de captura e detecção de objetos
		with open(f"{module_dir}/results.csv", "w") as arquivo:#criando/lendo o arquivo que vai guardar os testes
			cabecalho = ["Objeto", "Porcentagem"]
			escritor_csv = DictWriter(arquivo, fieldnames=cabecalho)
			escritor_csv.writeheader()

			while True:
				if w is None or h is None:
					#fatiar apenas dois primeiros elementos da tupla
					h, w = image.shape[:2]

				#A forma resultante possui um numero de quadros, numero de canais, largura e altura
				#E.G.:
				blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)

				#Implementando o passe direto com nosso blob somente atraves das camadas de saída
				#Calculo ao mesmo tempo, tempo necessário para encaminhamento
				network.setInput(blob) #definindo blob como entrada para a rede
				output_from_network = network.forward(layers_names_output)

				#preparando listas para caixas delimitadoras detectadas
				bounding_boxes = []
				confidences = []
				class_numbers = []

				#passando por todas as camadas de saída após o avanço da alimentação
				#fase de detecção dos objetos
				for result in output_from_network:
					for detected_objects in result:
						scores = detected_objects[5:]
						class_current = np.argmax(scores)

						confidence_current = scores[class_current]

						#eliminando previsões fracas com probablilidade minima
						if confidence_current > probability_minimum:
							box_current = detected_objects[0:4] * np.array([w, h, w, h])
							x_center, y_center, box_width, box_height = box_current
							x_min = int(x_center - (box_width / 2))
							y_min = int(y_center - (box_height / 2))

							#Adicionando resultados em listas preparadas
							bounding_boxes.append([x_min, y_min, int(box_width), int(box_height)])
							confidences.append(float(confidence_current))
							class_numbers.append(class_current)
							
				results = cv2.dnn.NMSBoxes(bounding_boxes, confidences, probability_minimum, threshold)

				#verificando se existe pelo menos um objeto detectado
				if len(results) > 0:
					for i in results.flatten():
						x_min, y_min = bounding_boxes[i][0], bounding_boxes[i][1]
						box_width, box_height = bounding_boxes[i][2], bounding_boxes[i][3]
						colours_box_current = colours[class_numbers[i]].tolist()
						image_new = cv2.rectangle(image, (x_min, y_min), (x_min + box_width, y_min + box_height), colours_box_current, 2)

						#modificando porcentagem para 2 casas decimais.
						percent = str(confidences[i])
						percent_formatted = int(percent[2:6])
						percent_formatted = str(percent_formatted/100)+"%"

						#Preparando texto com rótulo e acuracia para o objeto detectado.
						text_box_current = "{}: {}".format(labels[int(class_numbers[i])], percent_formatted)

						# Coloca o texto nos objetos detectados
						cv2.putText(image, text_box_current, (x_min, y_min - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colours_box_current, 2)

						escritor_csv.writerow( {"Objeto": text_box_current.split(":")[0], "Porcentagem": text_box_current.split(":")[1]})
						
						lista_all.append(text_box_current.split(":")[0].capitalize() +" - "+ text_box_current.split(":")[1] +" de chance de ser este objeto.")

					contexto["img_path_new"] = "../../media/new"+filename

					#unindo caminho para salvar imagem com retangulo e descrição.
					img_path_new = os.path.join(fs.location, "new"+filename)
					cv2.imwrite(f"{img_path_new}", image_new)#salvando nova imagem.

					text_voice = "Esses foram alguns objetos identificados na imagem submetida: "

					for linha in lista_all:
						linha = linha.split(" - ")
						text_voice += f" {linha[0]},"

					text_voice += ", Todos objetos encontrados tem uma chance de estarem corretos entre 51 a 99%."
					text_voice += ", Para parar a fala pressione a tecla espaço."

					TTS = gTTS(text=text_voice, lang='pt-br')

					# Save to mp3 in current dir.
					TTS.save(os.path.join(fs.location, "audio.mp3"))
					audio_path = os.path.join(fs.location, "audio.mp3")
					contexto["audio_path"] = "../../media/audio.mp3"

					return redirect("../result")
				elif len(results) <= 0:
					contexto["img_path_new"] = "../../media/"+filename
					text_voice = "Não foram identificados objetos nesta imagem, para parar a fala pressione a tecla espaço."

					TTS = gTTS(text=text_voice, lang='pt-br')

					# Save to mp3 in current dir.
					TTS.save(os.path.join(fs.location, "audio.mp3"))
					audio_path = os.path.join(fs.location, "audio.mp3")
					contexto["audio_path"] = "../../media/audio.mp3"

					return redirect("../result")
	return render(req, 'submitimg.html')#acessa a página pedida
Ejemplo n.º 46
0
def convert_spacetx_json_to_csv(spacetx_json: str, csv: IOBase):
    spacetx_json_path = Path(spacetx_json).absolute()
    _, name, baseurl = slicedimage.io.resolve_path_or_url(spacetx_json)
    data = slicedimage.io.Reader.parse_doc(name, baseurl)
    assert isinstance(data, slicedimage.Collection)

    csvwriter = DictWriter(csv, [
        "fov",
        "round",
        "ch",
        "zplane",
        "xc_min",
        "xc_max",
        "yc_min",
        "yc_max",
        "zc_min",
        "zc_max",
        "path",
        "sha256",
    ])
    csvwriter.writeheader()

    seen_fov_nums: MutableMapping[int, str] = dict()
    for name, tileset in data.all_tilesets():
        fov_num = int("".join(
            [character for character in name if character.isdigit()]))

        if fov_num in seen_fov_nums:
            raise ValueError(
                f"both {name} and {seen_fov_nums[fov_num]} resolve to the same fov number"
            )
        seen_fov_nums[fov_num] = name

        for tile in tileset.tiles():
            row = {
                'fov': str(fov_num),
                'round': str(tile.indices[Axes.ROUND]),
                'ch': str(tile.indices[Axes.CH]),
                'zplane': str(tile.indices[Axes.ZPLANE]),
                'xc_min': str(tile.coordinates[Coordinates.X][0]),
                'xc_max': str(tile.coordinates[Coordinates.X][1]),
                'yc_min': str(tile.coordinates[Coordinates.Y][0]),
                'yc_max': str(tile.coordinates[Coordinates.Y][1]),
                'zc_min': str(tile.coordinates[Coordinates.Z][0]),
                'zc_max': str(tile.coordinates[Coordinates.Z][1]),
                'sha256': tile.sha256,
            }

            # getting the path is a brittle operation
            for closure_contents in tile._numpy_array_future.__closure__:
                cell_contents = closure_contents.cell_contents

                if isinstance(cell_contents, _FileLikeContextManager):
                    path = Path(cell_contents.path).relative_to(
                        spacetx_json_path.parent)
                    break
            else:
                raise ValueError(f"Could not find the path")
            row['path'] = path

            csvwriter.writerow(row)
Ejemplo n.º 47
0
def _write_out_file(file_name: str, headers: List[str], content: List[dict]):
    with open(os.path.join(DATA_DIR, file_name), 'w') as out_file:
        writer = DictWriter(out_file, fieldnames=headers)
        writer.writeheader()
        writer.writerows(content)
Ejemplo n.º 48
0
def run_test_cases(test_cases_filename, running_real_tests):
    #== Section 1: Setup (Files & Data) ==#
    # First determine where the renders live
    parts = [test_cases_filename, 'refernence', 'renders']
    references_folder = '_'.join(parts)

    if running_real_tests:
        parts = [test_cases_filename, 'renders']

    # build some file paths
    renders_destination = '_'.join(parts)
    results_csv_filename = path.join(renders_destination, 'results.csv')
    results_txt_filename = path.join(renders_destination, 'results.txt')
    cmake_cache_src = path.join('build', 'CMakeCache.txt')
    cmake_cache_dst = path.join(renders_destination, 'CMakeCache.txt')

    # Read in the test configurations
    test_cases = []
    with open(test_cases_filename, 'r') as csv_file:
        reader = DictReader(csv_file)
        test_cases = [row for row in reader]

    # Find the samesies for when we do the `idiff`
    matching_renders = find_matching_renders(test_cases)
    have_matching = (len(matching_renders) > 0)
    if have_matching:
        matching_msg = 'Verifying cases where renders should be the same:'
    else:
        matching_msg = 'WARNING: Not able to find any test cases that should produce the same result'

    # Save renders to a folder that's close to our filename
    Path(renders_destination).mkdir(exist_ok=True, parents=True)

    # Copy over the CMake build configuration, this way you know how the software was build for that test run
    copyfile(cmake_cache_src, cmake_cache_dst)

    # Create the results CSV file
    with open(results_csv_filename, 'w') as csv_file:
        fields = list(FIELDS)
        fields.append(
            'render_time_ns')  # Add on the render time as another column

        # If we're running the real tests, we also need to list PASS/FAIL status
        if running_real_tests:
            fields.append('matches_reference')

        writer = DictWriter(csv_file, fieldnames=fields)
        writer.writeheader()

    #== Section 2: Running Tests ==#
    # Run the renders!
    total_render_time_ns = 0
    num_matches_reference = 0
    num_total_cases = len(test_cases)

    print('Running %i test cases:' % num_total_cases)
    for case in test_cases:
        #== Section 2a: Run the Render ==#
        # Build arguments to run with
        id_num = case['id_num']
        render_filename = '%s.png' % id_num
        render_filepath = path.join(renders_destination, render_filename)

        args = [
            '--testing-mode',
            '-o',
            render_filepath,
            '--scene',
            case['scene_id'],
            '--random-seed',
            case['random_seed'],
            '--num-threads',
            case['num_threads'],
            '--depth',
            case['ray_depth'],
            '--num-samples',
            case['num_samples'],
            '--size',
            case['size'],
        ]

        # Do the render
        print('  Test %s/%s:' % (id_num, num_total_cases), end='', flush=True)
        output = check_output([PS_RAYTRACING_EXE,
                               *args]).decode('utf-8').strip()
        parts = output.split(' ')

        # Verify things were outputted correctly, if not, the quit testing
        if (len(parts) != 2) or (parts[1] != 'ns'):
            print('Error in the text output from test %s: %s' %
                  (id_num, output))
            print("It's not as expected, quiting the test suite")
            exit(1)

        #== Section 2b: Check Render Results ==#
        # Test against the reference (maybe?)
        pass_fail_str = None
        if running_real_tests:
            ref_render_filepath = path.join(references_folder,
                                            '%s.png' % id_num)
            result = test_images_match(ref_render_filepath, render_filepath)

            if result:
                num_matches_reference += 1
                pass_fail_str = 'PASS'
            else:
                pass_fail_str = 'FAIL'

            # print the result of the pass/fail
            print(' %s' % pass_fail_str, end='', flush=True)

        #== Section 2c: Report/Save Metrics ==#
        # Get the time
        render_time_ns = int(parts[0])
        total_render_time_ns += render_time_ns
        render_time_s = render_time_ns / 1000000000.0
        print(' [%.3f s]' % render_time_s, end='', flush=True)

        # Newline
        print('')

        # Write results to CSV
        with open(results_csv_filename, 'a') as csv_file:
            # Add on the "render time (ns)" column
            case['render_time_ns'] = render_time_ns

            # And maybe the pass/fail
            if pass_fail_str:
                case['matches_reference'] = pass_fail_str

            DictWriter(csv_file, fieldnames=case.keys()).writerow(case)

    #== Section 3: Verification of Matching Renders ==#
    # Verify renders that should be the same
    print('')
    print(matching_msg)

    matching_renders_results_txt = ''
    for pair in matching_renders:
        # Build the arguments for the `idff` command
        render_a = path.join(renders_destination, '%s.png' % pair[0])
        render_b = path.join(renders_destination, '%s.png' % pair[1])
        result = test_images_match(render_a, render_b)
        result_str = 'PASS' if result else 'FAIL'

        # Format the message to print (and save to report)
        case = '  %s -- %s : %s' % (render_a, render_b, result_str)
        print(case)
        matching_renders_results_txt += '%s\n' % case

    #== Section 4: Metrics Info  ==#
    # Metrics
    total_time_str = 'Total render time was %.3f s' % (total_render_time_ns /
                                                       1000000000.0)
    print('')
    print(total_time_str)

    # Put some of those metrics in a file
    with open(results_txt_filename, 'w') as results_txt:
        if running_real_tests:
            results_txt.write('%s/%s tests passed\n' %
                              (num_matches_reference, num_total_cases))

        results_txt.write('%s (or %i ns)\n' %
                          (total_time_str, total_render_time_ns))
        results_txt.write('%s\n' % matching_msg)

        if have_matching:
            results_txt.write(matching_renders_results_txt)
Ejemplo n.º 49
0
    with open("results/variance.csv", "w") as f:
        header = [
            "name", "octtime mean", "octtime var", "dfstime mean",
            "dfstime var", "stdtime mean", "stdtime var", "biptime mean",
            "biptime var", "naivetime mean", "naivetime var", "apxtime mean",
            "apxtime var", "greedytime mean", "greedytime var",
            "octfirsttime mean", "octfirsttime var", "bipfirsttime mean",
            "bipfirsttime var", "rectime mean", "rectime var",
            "recocttime mean", "recocttime var", "recbiptime mean",
            "recbiptime var", "dfssize mean", "dfssize var", "stdsize mean",
            "stdsize var", "octsize mean", "octsize var", "n", "bfssize mean",
            "bfstime mean", "bfstime var"
        ]
        results = DictWriter(f, header)
        results.writeheader()

        for filename in os.listdir(filepath):
            if not filename.endswith(".s6"):
                continue

            res = {}

            graphname = filename.split(".s6")[0]
            print(graphname)
            res["name"] = graphname

            graph = read_sparse6("{}{}".format(filepath, filename))
            print("n: {}".format(len(graph)))
            res["n"] = len(graph)
Ejemplo n.º 50
0
print(range_instance.generator())
range_instance.range_list()

# %% CSV

from csv import DictWriter, DictReader

from pathlib import Path

csv_file_path = Path("./test.csv")

test_rows = [{"a": 1, "b": 2}, {"a": 40, "b": 34}]

with open("test.csv", "w") as test_file:
    test_writer = DictWriter(test_file, fieldnames=["a", "b"])
    test_writer.writeheader()
    for row in test_rows:
        test_writer.writerow(row)

# %%


def test():
    row = list()

    def test_two():
        row = None
        row.append(1)


test_rows = []
def main():
    single_counts = Counter()
    double_counts = Counter()

    all_labels = []

    with open('processed/genres.json', 'r') as genres_file:
        genres = load(genres_file)

    id_to_genre = {}
    for genre, gid in genres.items():
        id_to_genre[gid] = genre

    with open('processed/labels.txt', 'r') as labels_file:
        for line in labels_file:
            all_labels.append(
                list(map(id_to_genre.get,
                         line.strip().split(','))))

    for labels in all_labels:
        single_counts.update(labels)
        double_counts.update(map(tuple, map(sorted, combinations(labels, 2))))

    print(single_counts)
    print()
    print(double_counts)

    workbook = Workbook('distribution.xlsx')
    worksheet = workbook.add_worksheet()
    bold = workbook.add_format({
        'bold': True,
        'align': 'center',
        'valign': 'vcenter'
    })
    bold_header = workbook.add_format({
        'bold': True,
        'align': 'center',
        'valign': 'vcenter',
        'bg_color': '#a0a0a0'
    })
    bold_header_sideways = workbook.add_format({
        'bold': True,
        'rotation': 90,
        'align': 'center',
        'valign': 'vcenter',
        'bg_color': '#a0a0a0'
    })

    row = 0
    for genre2 in sorted(single_counts.keys()):
        worksheet.write(row, 0, genre2, bold_header)
        col = 1
        for genre1 in filter(lambda g1: g1 < genre2,
                             sorted(single_counts.keys())):
            co_occurrances = double_counts[(genre1, genre2)]
            ratio = (256 * co_occurrances) // (
                single_counts[genre1] + single_counts[genre2] - co_occurrances)
            f = workbook.add_format({
                'bg_color':
                "#FF{0:02x}{0:02x}".format(255 - (ratio * 3)),
                'align':
                'center',
                'valign':
                'vcenter'
            })
            worksheet.write(row, col, co_occurrances, f)
            col += 1

        worksheet.write(row, row, single_counts[genre2], bold)

        row += 1

    for col, genre1 in enumerate([''] + sorted(single_counts.keys())):
        worksheet.write(len(single_counts.keys()), col, genre1,
                        bold_header_sideways)

    workbook.close()

    with open('distribution.csv', 'w') as dist_file:
        writer = DictWriter(dist_file,
                            fieldnames=['genre'] +
                            sorted(single_counts.keys()))
        writer.writeheader()

        for genre2 in sorted(single_counts.keys()):
            row = {'genre': genre2, genre2: single_counts[genre2]}
            for genre1 in filter(lambda g1: g1 < genre2, single_counts.keys()):
                row[genre1] = double_counts[(genre1, genre2)]

            writer.writerow(row)
Ejemplo n.º 52
0
def run_gavrptw(instance_name, unit_cost, init_cost, wait_cost, delay_cost, ind_size, pop_size, \
            cx_pb, mut_pb, n_gen, export_csv=False, customize_data=False):
    '''gavrptw.core.run_gavrptw(instance_name, unit_cost, init_cost, wait_cost, delay_cost,
                ind_size, pop_size, cx_pb, mut_pb, n_gen, export_csv=False, customize_data=False)'''
    if customize_data:
        json_data_dir = os.path.join(BASE_DIR, 'data', 'json_customize')
    else:
        json_data_dir = os.path.join(BASE_DIR, 'data', 'json')
    json_file = os.path.join(json_data_dir, '{}.json'.format(instance_name))
    instance = load_instance(json_file=json_file)
    if instance is None:
        return
    creator.create('FitnessMax', base.Fitness, weights=(1.0, ))
    creator.create('Individual', list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()
    # Attribute generator
    toolbox.register('indexes', random.sample, range(1, ind_size + 1),
                     ind_size)
    # Structure initializers
    toolbox.register('individual', tools.initIterate, creator.Individual,
                     toolbox.indexes)
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)
    # Operator registering
    toolbox.register('evaluate',
                     eval_vrptw,
                     instance=instance,
                     unit_cost=unit_cost,
                     init_cost=init_cost,
                     wait_cost=wait_cost,
                     delay_cost=delay_cost)
    toolbox.register('select', tools.selRoulette)
    toolbox.register('mate', cx_partialy_matched)
    toolbox.register('mutate', mut_inverse_indexes)
    pop = toolbox.population(n=pop_size)
    # Results holders for exporting results to CSV file
    csv_data = []
    print('Start of evolution')
    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
    print('  Evaluated {} individuals'.format(len(pop)))
    # Begin the evolution
    for gen in range(n_gen):
        print('-- Generation {} --'.format(gen))
        # Select the next generation individuals
        offspring = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, offspring))
        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cx_pb:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if random.random() < mut_pb:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        print('  Evaluated {} individuals'.format(len(invalid_ind)))
        # The population is entirely replaced by the offspring
        pop[:] = offspring
        # Gather all the fitnesses in one list and print the stats
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x * x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        print('  Min {}'.format(min(fits)))
        print('  Max {}'.format(max(fits)))
        print('  Avg {}'.format(mean))
        print('  Std {}'.format(std))
        # Write data to holders for exporting results to CSV file
        if export_csv:
            csv_row = {
                'generation': gen,
                'evaluated_individuals': len(invalid_ind),
                'min_fitness': min(fits),
                'max_fitness': max(fits),
                'avg_fitness': mean,
                'std_fitness': std,
            }
            csv_data.append(csv_row)
    print('-- End of (successful) evolution --')
    best_ind = tools.selBest(pop, 1)[0]
    print('Best individual: {}'.format(best_ind))
    print('Fitness: {}'.format(best_ind.fitness.values[0]))
    print_route(ind2route(best_ind, instance))
    print('Total cost: {}'.format(1 / best_ind.fitness.values[0]))
    if export_csv:
        csv_file_name = '{}_uC{}_iC{}_wC{}_dC{}_iS{}_pS{}_cP{}_mP{}_nG{}.csv'.format(
            instance_name, unit_cost, init_cost, wait_cost, delay_cost,
            ind_size, pop_size, cx_pb, mut_pb, n_gen)
        csv_file = os.path.join(BASE_DIR, 'results', csv_file_name)
        print('Write to file: {}'.format(csv_file))
        make_dirs_for_file(path=csv_file)
        if not exist(path=csv_file, overwrite=True):
            with io.open(csv_file, 'wt', newline='') as file_object:
                fieldnames = [
                    'generation',
                    'evaluated_individuals',
                    'min_fitness',
                    'max_fitness',
                    'avg_fitness',
                    'std_fitness',
                ]
                writer = DictWriter(file_object,
                                    fieldnames=fieldnames,
                                    dialect='excel')
                writer.writeheader()
                for csv_row in csv_data:
                    writer.writerow(csv_row)
Ejemplo n.º 53
0
def register_user(username, password, userids):
    """Takes input, bcrypts it, and writes it to a file.

    Keyword arguments:
    username - The unique identifier for the user.
    password - Self-explanatory.
    userids - The unique identifiers that the user will have access to
    in order to retrieve encrypted data.

    Outupt:
    The function writes the username, hashed password, userids, a TOTP
    key and a generated API key to u_file as specified in the
    configuration above.
    """
    if exists(u_file):
        try:
            user_file = open(u_file, 'r', encoding='ascii')
        except PermissionError:
            print('Unable to open the file.  Check permissions.')
            exit(1)
        user_check = DictReader(user_file)
        for line in user_check:
            if username == line['username']:
                print('User already exists.  Exiting.')
                exit(1)
        user_file.close()
    else:
        pass
    if validate_un(username):
        # Setting file info.
        f_headers = [
            'username', 'password', 'userids', 'apikey', 'totp', 'fl_tstamp',
            'fl_count'
        ]
        if exists(u_file):
            pwd_file = open(u_file, 'a', newline='', encoding='ascii')
            writer = DictWriter(pwd_file, fieldnames=f_headers)
        else:
            pwd_file = open(u_file, 'w', newline='', encoding='ascii')
            writer = DictWriter(pwd_file, fieldnames=f_headers)
            writer.writeheader()
        # Converting input as needed.
        if validate_pw(password):
            pwd = password.encode(encoding='ascii')
            h_pwd = hashpw(b64encode(sha512(pwd).digest()), gensalt())
            apikey = sha256(b64encode(urandom(32))).hexdigest()
            totp = b32encode(urandom(16)).decode('ascii').strip('=')
        else:
            print('Password does not meet password requirements')
            exit(1)
        # Writing input to file.
        if ',' in userids:
            writer.writerow({
                'username': username,
                'password': h_pwd.decode(encoding='ascii'),
                'userids': userids.split(','),
                'apikey': apikey,
                'topt': totp,
                'fl_tstamp': 'None',
                'fl_count': '0'
            })
        else:
            writer.writerow({
                'username': username,
                'password': h_pwd.decode(encoding='ascii'),
                'userids': [userids],
                'apikey': apikey,
                'totp': totp,
                'fl_tstamp': 'None',
                'fl_count': '0'
            })
        pwd_file.close()
        return {'apikey': apikey, 'totp': totp}
    else:
        print('User name is not in a valid format.')
        exit(1)
Ejemplo n.º 54
0
    def convertFacts(self):

        finalFields = mappings.FACT_EXPORT_FIELDS

        shortMap, issueMap, orgMap = {}, {}, {}

        with open(self.expFolder + 'org_names.csv', 'r') as namesIn:
            reader = DictReader(namesIn)
            [
                orgMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.expFolder + 'people_names.csv', 'r') as namesIn:
            reader = DictReader(namesIn)
            [
                shortMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.expFolder + 'issue_names.csv', 'r') as issuesIn:
            reader = DictReader(issuesIn)
            [
                issueMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.facts[0], 'r') as factsIn:
            reader = DictReader(factsIn)
            contents = [dict(a) for a in list(reader)]

        with open(self.expFolder + 'doc_shorts.csv', 'r') as docShortsIn:
            reader = DictReader(docShortsIn)
            docShorts = []
            [docShorts.append(a['Short Name']) for a in list(reader)]

        for row in contents:

            for field in mappings.FACT_DISCARD_FIELDS:
                row.pop(field, '')

        allFields = list(contents[0].keys())

        for field in allFields:
            if not any(entry[field] for entry in contents):
                for row in contents:
                    row.pop(field)

        for field in list(contents[0].keys()):
            if field not in mappings.FACT_BUILT_INS:
                fieldOut = open(self.impFolder + 'fact_custom_props.txt', 'a')
                fieldOut.write("{0}\n".format(field))
                fieldOut.close()

        for row in contents:

            charList = []

            for short, full in orgMap.items():
                if short in row['Fact Text']:
                    row['Fact Text'] = row['Fact Text'].replace(short, full)
                    charList.append(full)

            for short, full in shortMap.items():
                if short in row['Fact Text']:
                    row['Fact Text'] = row['Fact Text'].replace(short, full)
                    charList.append(full)

            for short, full in issueMap.items():
                row['Linked Issues'] = row['Linked Issues'].replace(
                    short, full)

            row['Issues'] = row.pop('Linked Issues').replace(',', ';')
            row['Characters'] = '; '.join(charList)
            row['Description'] = row.pop('Fact Text')
            row['Title'] = ' '.join(row['Description'].split()[:8])
            row['Undisputed'] = 'No'

            if row['Date & Time'] == 'TBD':
                row['Start Date'] = ''
                row['End Date'] = ''

            else:
                row['Start Date'] = fixDate(row['Date & Time'])

            row.pop('Date & Time')

            row['Author'] = ''
            row['Annotation Sources'] = ''

            sourceList = []

            for doc in docShorts:
                if doc in row['Source(s)']:
                    sourceList.append(doc)
                    row['Source(s)'] = row['Source(s)'].replace(doc, '')

            row['Source(s)'] = re.sub(r'\[.*\]', '', row['Source(s)'])

            row['Full-Text Sources'] = '; '.join(sourceList)
            row['Full-Text Sources'] += '; {0}'.format(
                row['Source(s)'].strip())

            row.pop('Source(s)')

        finalFields = list(contents[0].keys())

        factWriter = DictWriter(open(self.facts[1], 'w'),
                                lineterminator='\n',
                                fieldnames=finalFields)
        factWriter.writeheader()

        for row in contents:
            factWriter.writerow(row)
Ejemplo n.º 55
0
    def convertDocs(self):

        shortMap, issueMap, orgMap = {}, {}, {}
        issueLists = []
        issuePrefix = "DLI_"

        with open(self.expFolder + 'org_names.csv', 'r') as namesIn:
            reader = DictReader(namesIn)
            [
                orgMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.expFolder + 'people_names.csv', 'r') as namesIn:
            reader = DictReader(namesIn)
            [
                shortMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.expFolder + 'issue_names.csv', 'r') as issuesIn:
            reader = DictReader(issuesIn)
            [
                issueMap.update({a['Short Name']: a['Full Name']})
                for a in list(reader)
            ]

        with open(self.documents[0]) as rawIn:
            reader = DictReader(rawIn)
            contents = [dict(a) for a in list(reader)]

        for entry in contents:
            for field in mappings.DOC_DISCARD_FIELDS:
                entry.pop(field)

            issueLists.append(
                [a.strip() for a in entry['Linked Issues'].split(',')])

        issueMax = len(max(issueLists, key=len))

        allFields = list(contents[0].keys())

        for field in allFields:
            if not any(entry[field] for entry in contents):
                for row in contents:
                    row.pop(field)

        for entry in contents:

            if entry['Date'] in ['TBD', '']:
                entry['Date'] = ''
            else:
                entry['Date'] = fixDate(entry['Date'])

            for suffix in mappings.DOC_SUFFIXES:
                entry['Linked File'] = entry['Linked File'].replace(suffix, '')

            for short, full in shortMap.items():
                for key, value in entry.items():
                    if key == 'Linked File':
                        continue
                    else:
                        entry[key] = value.replace(short, full)

            for short, full in orgMap.items():
                for key, value in entry.items():
                    if key == 'Linked File':
                        continue
                    else:
                        entry[key] = value.replace(short, full)

        for field in list(contents[0].keys()):
            if field not in mappings.DOC_BUILT_INS:
                fieldOut = open(self.impFolder + 'doc_custom_props.txt', 'a')
                fieldOut.write("{0}\n".format(field))
                fieldOut.close()

        finalFields = list(contents[0].keys())
        finalFields.remove('Linked Issues')

        for i in range(0, issueMax):
            finalFields.append("{0}{1}".format(issuePrefix, i))

        for row in contents:

            if not row['Linked Issues'] == '':
                for index, issue in enumerate(row['Linked Issues'].split(',')):
                    try:
                        row.update({
                            '{0}{1}'.format(issuePrefix, index):
                            issueMap[issue.strip()]
                        })
                    except KeyError:  # in case there are dupe issues(replaced above) and org names
                        row.update({
                            '{0}{1}'.format(issuePrefix, index):
                            issue.strip()
                        })

            row.pop('Linked Issues')

            for key, value in row.items():
                if key == 'Linked File':
                    continue
                else:
                    row[key] = value.replace(',', ';')

        writer = DictWriter(open(self.documents[1], 'w'),
                            lineterminator='\n',
                            fieldnames=finalFields)
        docShortWriter = DictWriter(open(self.expFolder + 'doc_shorts.csv',
                                         'w'),
                                    lineterminator='\n',
                                    fieldnames=['Short Name'])

        writer.writeheader()
        docShortWriter.writeheader()

        for row in contents:
            writer.writerow(row)
            docShortWriter.writerow({'Short Name': row['Short Name']})
Ejemplo n.º 56
0
def main(corrected_csv,
         cluster_info,
         output_prefix,
         fasta_file=None,
         gff_file=None,
         faa_file=None):

    # read corrected CSV
    reader = DictReader(open(corrected_csv), delimiter='\t')
    for k in CORRECTED_CSV_FILELDS:
        if k not in reader.fieldnames:
            print("The following fields must exist in {0}!\n{1}".format(
                corrected_csv, "\n".join(CORRECTED_CSV_FILELDS)))
            sys.exit(-1)

    per_unique = {}  # tag -> record
    per_unique_count = Counter()  # tag -> number of duplicates
    per_pbid = defaultdict(lambda: {
        'gene': None,
        'transcript': None,
        'clusters': []
    })  # pbid --> list of clusters it is in
    for r in reader:
        tag = "{bc}-{umi}-{gene}".format(bc=r['BC_ed'],
                                         umi=r['UMI_ed'],
                                         gene=r['gene'])
        per_unique[tag] = r
        per_unique_count[tag] += 1

    # now link barcode to cell type, also PCR dup counts
    for tag in per_unique:
        c = cluster_info[per_unique[tag]['BC_ed']]
        rec = per_unique[tag]
        rec['cluster'] = c
        rec['num_dups'] = per_unique_count[tag]
        pbid = rec['pbid']
        if pbid in per_pbid: per_pbid[pbid]['clusters'].add(c)
        else:
            per_pbid[pbid] = {
                'gene': rec['gene'],
                'transcript': rec['transcript'],
                'clusters': set([c])
            }

    # write out de-dup CSV file
    with open(output_prefix + '.csv', 'w') as f:
        writer = DictWriter(f,
                            CORRECTED_CSV_FILELDS + ['cluster', 'num_dups'],
                            delimiter='\t',
                            extrasaction='ignore')
        writer.writeheader()
        keys = per_unique.keys()
        for k in sorted(keys):
            writer.writerow(per_unique[k])

    if fasta_file is not None:
        f_d = {}  # cluster --> file handle
        # writer pbid master file
        with open(output_prefix + '.fasta', 'w') as f:
            for r in SeqIO.parse(open(fasta_file), 'fasta'):
                if r.id in per_pbid:
                    newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\
                            pbid=r.id,
                            gene=per_pbid[r.id]['gene'],
                            transcript=per_pbid[r.id]['transcript'],
                            clusters=";".join(per_pbid[r.id]['clusters']))
                    f.write(">{0}\n{1}\n".format(newid, r.seq))
                    for c in per_pbid[r.id]['clusters']:
                        if c not in f_d:
                            f_d[c] = open(
                                "{o}.{c}.fasta".format(o=output_prefix, c=c),
                                'w')
                        f_d[c].write(">{0}\n{1}\n".format(newid, r.seq))

    if faa_file is not None:
        f_d = {}  # cluster --> file handle
        # writer pbid master file
        with open(output_prefix + '.faa', 'w') as f:
            for r in SeqIO.parse(open(faa_file), 'fasta'):
                if r.id in per_pbid:
                    newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\
                            pbid=r.id,
                            gene=per_pbid[r.id]['gene'],
                            transcript=per_pbid[r.id]['transcript'],
                            clusters=";".join(per_pbid[r.id]['clusters']))
                    f.write(">{0}\n{1}\n".format(newid, r.seq))
                    for c in per_pbid[r.id]['clusters']:
                        if c not in f_d:
                            f_d[c] = open(
                                "{o}.{c}.faa".format(o=output_prefix, c=c),
                                'w')
                        f_d[c].write(">{0}\n{1}\n".format(newid, r.seq))
        for handle in f_d.values():
            handle.close()

    if gff_file is not None:
        f_d = {}  # cluster --> file handle
        # writer pbid master file
        with open(output_prefix + '.gff', 'w') as f:
            for r in collapseGFFReader(gff_file):
                if r.seqid in per_pbid:
                    newid = "{pbid}|{gene}|{transcript}|{clusters}".format(\
                            pbid=r.seqid,
                            gene=per_pbid[r.seqid]['gene'],
                            transcript=per_pbid[r.seqid]['transcript'],
                            clusters=";".join(per_pbid[r.seqid]['clusters']))
                    write_collapseGFF_format(f, r)
                    for c in per_pbid[r.seqid]['clusters']:
                        if c not in f_d:
                            f_d[c] = open(
                                "{o}.{c}.gff".format(o=output_prefix, c=c),
                                'w')
                        write_collapseGFF_format(f_d[c], r)
        for handle in f_d.values():
            handle.close()
Ejemplo n.º 57
0
def write_file(header, rows, file_path):
    with open(file_path, 'w') as archivo:
        writer = DictWriter(archivo, fieldnames=header)
        writer.writeheader()
        writer.writerows(rows)
Ejemplo n.º 58
0
def main(
    input_prefix: str = typer.Argument(...),
    output_prefix: str = typer.Argument(...),
    cpus: int = typer.Option(10, "--cpus", "-n", help="Number of CPUS"),
    version: bool = typer.Option(
        None,
        "--version",
        callback=version_callback,
        is_eager=True,
        help="Prints the version of the SQANTI3 package.",
    ),
) -> None:

    info = {}
    for r in SeqIO.parse(open(f"{input_prefix}.lima.clips"), "fasta"):
        zmw = r.id[: r.id.rfind("/")]
        e = int(r.id.split("/")[2].split("_")[1])
        if e < 100:
            info[zmw] = "F5" if r.description.split("bc:")[-1] == "0" else "R3"
    logger.info("Finished reading lima clips file.")

    num_records = len(info)
    chunk_size = (num_records // cpus) + (num_records % cpus)

    offset_start = 0
    input_bam = f"{input_prefix}.bam"
    pools = []
    onames = []
    while offset_start <= num_records:
        oname = f"{output_prefix}.{str(offset_start)}"
        p = Process(
            target=deconcat_worker,
            args=(input_bam, offset_start, offset_start + chunk_size, oname, info),
        )
        p.start()
        logger.info(
            f"Launching deconcat worker for records {offset_start}-{offset_start + chunk_size}..."
        )
        offset_start += chunk_size
        pools.append(p)
        onames.append(oname)

    for p in pools:
        p.join()

    logger.info("All deconcat workers done. Collecting results.")
    with open(f"{output_prefix}.csv", "w") as f_csv:
        writer = DictWriter(f_csv, CSV_FIELDS, delimiter=",")
        writer.writeheader()
        bams = []
        for oname in onames:
            bams.append(f"{oname}.bam")
            for r in DictReader(open(f"{oname}.csv"), delimiter=","):
                writer.writerow(r)

    logger.info("Merging bam files...")
    reader = pysam.AlignmentFile(bams[0], "rb", check_sq=False)
    with pysam.AlignmentFile(f"{output_prefix}.bam", "wb", header=reader.header) as f:
        for bam in bams:
            for r in pysam.AlignmentFile(bam, "rb", check_sq=False):
                x = pysam.AlignedSegment.from_dict(r.to_dict(), r.header)
                f.write(x)

    # pysam.merge(output_prefix+'.bam', *bams)

    for oname in onames:
        Path(f"{oname}.bam").unlink()
        Path(f"{oname}.csv").unlink()
    logger.info(f"Output written to: {output_prefix}.bam, {output_prefix}.csv")
Ejemplo n.º 59
0
def process_ser_log_parser(port):
    node_id = 0
    role = ''
    with Serial(port, BAUDRATE) as ser:
        # discover node_id
        while True:
            _, module, content = LogParser.parse_log_line(
                ser.readline().decode())
            if module is LogModule.MAIN:
                node_id = LogParser.parse_node_id(content)
                if node_id:
                    break

        # discover role
        while True:
            _, module, _ = LogParser.parse_log_line(ser.readline().decode())
            if module is LogModule.MASTER or module is LogModule.SLAVE:
                role = module
                print('node_id: {}, role: {}'.format(node_id, role.value))
                break

        # collect reports
        csv_file = '{}.csv'.format('master' if role is LogModule.MASTER else
                                   'slave-{}'.format(node_id))
        b64_file = '{}.b64'.format('master' if role is LogModule.MASTER else
                                   'slave-{}'.format(node_id))
        with open('{}-{}'.format('rss', csv_file), 'w') as rss_csv, \
             open('{}-{}'.format('acc', csv_file), 'w') as acc_csv, \
             open('{}-{}'.format('rss', b64_file), 'w') as rss_b64, \
             open('{}-{}'.format('acc', b64_file), 'w') as acc_b64:
            rss_writer = DictWriter(rss_csv,
                                    fieldnames=[
                                        'source_id', 'seq_num', 'time_stamp',
                                        'rss_local', 'rss_remote'
                                    ])
            acc_writer = DictWriter(
                acc_csv, fieldnames=[a.name.lower() for a in AccAxis])
            rss_writer.writeheader()
            acc_writer.writeheader()
            # write field names
            while True:
                level, module, content = LogParser.parse_log_line(
                    ser.readline().decode())
                if level is LogLevel.REP:
                    report_type, report_data = LogParser.parse_report(content)
                    assert (isinstance(report_type, ReportType))
                    if report_type is ReportType.RSS:
                        rss_b64.write('{}\n'.format(report_data))
                        rss_writer.writerow(
                            LogParser.parse_rss(b64_decode(report_data)))
                    else:
                        acc_b64.write('{}\n'.format(report_data))
                        acc_writer.writerows(
                            LogParser.parse_acc_xyz(b64_decode(report_data)))
                elif (module is LogModule.MASTER or module is LogModule.SLAVE
                      ) and content == 'Process started':
                    # restart detected, flush and terminate
                    rss_csv.flush()
                    acc_csv.flush()
                    rss_b64.flush()
                    acc_b64.flush()
                    break
Ejemplo n.º 60
-1
 def csv_results(self, csv_file, histogram_size=None):
     histogram_size = histogram_size or 10
     bins = range(histogram_size)
     bins.insert(0, "idea")
     bins.extend(["avg", "std_dev"])
     dw = DictWriter(csv_file, bins, dialect='excel', delimiter=';')
     dw.writeheader()
     by_idea = self._gather_results()
     values = {
         votable_id: self.results_for(voting_results, histogram_size)
         for (votable_id, voting_results) in by_idea.iteritems()
     }
     idea_names = dict(self.db.query(Idea.id, Idea.short_title).filter(
         Idea.id.in_(by_idea.keys())))
     idea_names = {
         id: name.encode('utf-8') for (id, name) in idea_names.iteritems()}
     ordered_idea_ids = Idea.visit_idea_ids_depth_first(
         AppendingVisitor(), self.get_discussion_id())
     ordered_idea_ids = [id for id in ordered_idea_ids if id in values]
     for idea_id, base in ordered_idea_ids:
         base = values[idea_id]
         r = dict(enumerate(base['histogram']))
         r['idea'] = idea_names[idea_id]
         r['avg'] = base['avg']
         r['std_dev'] = base['std_dev']
         dw.writerow(r)