from extractor import NewPbpExtractor from nflEvaluation import classifierEvaluation from nflClassifier import * from sklearn.cross_validation import train_test_split #--------------------------------------------------# #training #data2: combine 2014, first 80% for train, 20% for test data2013 = list(DictReader(open("pbp-2013.csv", 'r'))) data2014 = list(DictReader(open("pbp-2014.csv", 'r'))) data2015 = list(DictReader(open("pbp-2015.csv", 'r'))) dataList = [data2013, data2014, data2015] dataName = ["2013", "2014", "2015"] o = DictWriter(open("SVMoutput-sigmoid.csv", 'w'), [ "dataName", "classifier", "percent", "score", "OmniScore", "Type1-A/A/Good", "Type2-A/B/Bad", "Type3-A/B/Good", "Type4-A/A/Bad" ]) o.writeheader() #---------------------------------# for dataindex in range(len(dataList)): pbp2014 = NewPbpExtractor() pbp2014.buildFormationList(dataList[dataindex]) feature, target = pbp2014.extract4Classifier(dataList[dataindex]) dataLength = feature.shape[0] dataLength80 = round(dataLength * 0.8) X_train = feature[0:dataLength80, :] X_test = feature[(dataLength80 + 1):dataLength, :] y_train = target[0:dataLength80] y_test = target[(dataLength80 + 1):dataLength]
def collate_gene_info(group_filename, csv_filename, class_filename, output_filename, ontarget_filename=None, dedup_ORF_prefix=None, no_extra_base=False, is_clustered=False): """ <id>, <pbid>, <length>, <transcript>, <gene>, <category>, <ontarget Y|N|NA>, <ORFgroup NA|NoORF|groupID>, <UMI>, <BC> """ FIELDS = [ 'id', 'pbid', 'length', 'transcript', 'gene', 'category', 'ontarget', 'ORFgroup', 'UMI', 'UMIrev', 'BC', 'BCrev' ] group_info = read_group_info(group_filename) umi_bc_info = dict( (r['id'], r) for r in DictReader(open(csv_filename), delimiter='\t')) sqanti_info = dict( (r['isoform'], r) for r in DictReader(open(class_filename), delimiter='\t')) if ontarget_filename is not None: ontarget_info = dict( (r['read_id'], r) for r in DictReader(open(ontarget_filename), delimiter='\t')) if dedup_ORF_prefix is not None: dedup_ORF_info = { } # seqid --> which group they belong to (ex: PB.1.2 --> ORFgroup_PB.1_1) for line in open(dedup_ORF_prefix + '.group.txt'): group_id, members = line.strip().split('\t') for pbid in members.split(','): dedup_ORF_info[pbid] = group_id f = open(output_filename, 'w') writer = DictWriter(f, FIELDS, delimiter='\t') writer.writeheader() for ccs_id, pbid in group_info.items(): if pbid not in sqanti_info: print( "ignoring ID {0} cuz not in classification file.".format(pbid), file=sys.stderr) continue if is_clustered: # id: 1-ATCGAATGT-GCTTCTTTCACCTATCGATGATGGCTCAT-m64015_200531_015713/110297924/ccs _index, _umi, _bc, _ccs_id = ccs_id.split('-') ccs_id = _ccs_id if no_extra_base and (not is_clustered and umi_bc_info[ccs_id]['extra'] != 'NA'): print("ignoring ID {0} cuz extra bases.".format(pbid), file=sys.stderr) continue rec = {'id': ccs_id, 'pbid': pbid} rec['length'] = sqanti_info[pbid]['length'] rec['category'] = sqanti_info[pbid]['structural_category'] rec['transcript'] = sqanti_info[pbid]['associated_transcript'] rec['gene'] = sqanti_info[pbid]['associated_gene'] if is_clustered: rec['UMI'] = _umi rec['BC'] = _bc else: rec['UMI'] = umi_bc_info[ccs_id]['UMI'] rec['BC'] = umi_bc_info[ccs_id]['BC'] rec['UMIrev'] = Seq(rec['UMI']).reverse_complement() rec['BCrev'] = Seq(rec['BC']).reverse_complement() if ontarget_filename is None: rec['ontarget'] = 'NA' else: rec['ontarget'] = 'Y' if ontarget_info[pbid]['genes'] != '' else 'N' if dedup_ORF_prefix is None: rec['ORFgroup'] = 'NA' else: if pbid not in dedup_ORF_info: rec['ORFgroup'] = 'NoORF' else: rec['ORFgroup'] = dedup_ORF_info[pbid] writer.writerow(rec) f.close()
def update_status_file(self): with open(self.status_file, "w", encoding="utf8") as f: w = DictWriter(f, fieldnames=("file", "line")) w.writeheader() w.writerow({"file": self.last_file, "line": self.last_row})
from csv import DictWriter with open('D:/TencentAds/TencentLookalike/MetaData/userFeature.csv', 'w') as fo: headers = [ 'uid', 'age', 'gender', 'marriageStatus', 'education', 'consumptionAbility', 'LBS', 'interest1', 'interest2', 'interest3', 'interest4', 'interest5', 'kw1', 'kw2', 'kw3', 'topic1', 'topic2', 'topic3', 'appIdInstall', 'appIdAction', 'ct', 'os', 'carrier', 'house' ] writer = DictWriter(fo, fieldnames=headers, lineterminator='\n') writer.writeheader() fi = open('D:/TencentAds/TencentLookalike/MetaData/userFeature.data', 'r') for t, line in enumerate(fi, start=1): line = line.replace('\n', '').split('|') userFeature_dict = {} for each in line: each_list = each.split(' ') userFeature_dict[each_list[0]] = ' '.join(each_list[1:]) writer.writerow(userFeature_dict) if t % 100000 == 0: print(t) fi.close()
def save_results(res_file, results_dict, fieldnames): print(json.dumps(results_dict)) with open(res_file, 'a') as f: writer = DictWriter(f, fieldnames=fieldnames) writer.writerow(results_dict)
def scanForm(self): pytesseract.pytesseract.tesseract_cmd = 'c:\\Program Files\\Tesseract-OCR\\tesseract.exe' imgQ = cv2.imread(self.queryPath) h, w, c = imgQ.shape obj = ROI(self.queryPath) roi = obj.RI() print(roi) with open('dataOutput.csv', 'a+', newline='') as f: fields = [i[3] for i in roi] csvWriter = DictWriter(f, fieldnames=fields) csvWriter.writeheader() ## ORB-> Oriented FAST and Rotated BRIEF orb = cv2.ORB_create(1000) ##kp->key points unique points or elements in img ##des1->descripters are the representation of keys points kp1, des1 = orb.detectAndCompute(imgQ, None) # imgKp1=cv2.drawKeypoints(imgQ,kp1,None) # print(des1) path = self.dataPath myPicList = os.listdir(path) print(myPicList) for j, y in enumerate(myPicList): img = cv2.imread(path + "/" + y) kp2, des2 = orb.detectAndCompute(img, None) bf = cv2.BFMatcher(cv2.NORM_HAMMING) matches = bf.match(des2, des1) matches.sort(key=lambda x: x.distance) good = matches[:int(len(matches) * (self.percentage / 100))] imgMatch = cv2.drawMatches(img, kp2, imgQ, kp1, good[:100], None, flags=2) # ######################################################################### # item.distance: This attribute gives us the distance between the descriptors. A lower distance indicates a better match. # item.trainIdx: This attribute gives us the index of the descriptor in the list of train descriptors (in our case, it’s the list of descriptors in the img2). # item.queryIdx: This attribute gives us the index of the descriptor in the list of query descriptors (in our case, it’s the list of descriptors in the img1). # item.imgIdx: This attribute gives us the index of the train image. # ################################################################################# # print(good[0],kp2[good[0].queryIdx].pt,kp1[good[0].trainIdx].pt) srcPoints = np.float32([kp2[m.queryIdx].pt for m in good]).reshape(-1, 1, 2) dstPoints = np.float32([kp1[m.trainIdx].pt for m in good]).reshape(-1, 1, 2) # print(srcPoints) M, _ = cv2.findHomography(srcPoints, dstPoints, cv2.RANSAC, 5.0) # print(M) imgScan = cv2.warpPerspective(img, M, (w, h)) # imgScan = cv2.resize(imgScan, (w //4, h //4)) # cv2.imshow(y, imgScan) imgShow = imgScan.copy() imgMask = np.zeros_like(imgShow) myData = [] print(f'extracting data from form {j}') for x, r in enumerate(roi): imgCrop = imgScan[r[0][1]:r[1][1], r[0][0]:r[1][0]] if str(r[2]) == str('text'): s = pytesseract.image_to_string(imgCrop).replace("\n", "") s = s.replace("\x0c", "") print(f'{r[3]}:{s}') myData.append(s) if r[2] == 'box': imgGray = cv2.cvtColor(imgCrop, cv2.COLOR_BGR2GRAY) ###inverse ->bright region gives zero and dark region gives one imgThresh = cv2.threshold(imgGray, 170, 255, cv2.THRESH_BINARY_INV)[1] totalPixels = cv2.countNonZero(imgThresh) # print(r[3],totalPixels) if totalPixels > self.pixelThreshold: totalPixels = 1 else: totalPixels = 0 print(f'{r[3]}:{totalPixels}') myData.append(totalPixels) cv2.putText(imgShow, str(myData[x]), (r[0][0], r[0][1]), cv2.FONT_HERSHEY_PLAIN, 2.5, (0, 0, 255), 4) with open('dataOutput.csv', 'a+', newline='') as f: fields = [i[3] for i in roi] csvWriter = DictWriter(f, fieldnames=fields) print('my data is ', myData) dic = {} for i, d in enumerate(myData): dic[fields[i]] = d csvWriter.writerow(dic) imgShow = cv2.resize(imgShow, (w // 4, h // 4)) cv2.imshow('final', imgShow) print(myData) cv2.waitKey(0)
X_test = test['X'] y_test = test['Y'] print(y_train) print(list(set(y_train))) print(index) print("Model with dim=" + str(dim) + ", vecinos=" + str(vecinos) + " y C=" + str(c)) clf = LogisticRegression(C=c, max_iter=1000000) clf.fit(preprocessing.scale(X_train), y_train) y_pred = clf.predict(preprocessing.scale(X_test)) print(confusion_matrix(y_test, y_pred)) print(classification_report(y_test, y_pred)) print(accuracy_score(y_test, y_pred)) filename = 'lr_%s_%s.pckl' % (str(dim), str(c)) elapsed_time = perf_counter() - t0 print("Time " + str(elapsed_time)) pickle.dump(clf, open(filename, 'wb'), protocol=4) result = { "Dimension": dim, "Vecinos": vecinos, "C": c, "Accuracy": accuracy_score(y_test, y_pred) } with open("lriso.csv", "a+", newline='') as file: dict_writer = DictWriter(file, fieldnames=field_names) dict_writer.writerow(result)
def export_csv_file(self, product_templates): """ This method is used for export the odoo products in csv file format :param self: It contain the current class Instance @author: Nilesh Parmar @Emipro Technologies Pvt. Ltd on date 04/11/2019 """ buffer = StringIO() delimiter = "," field_names = [ "template_name", "product_name", "product_default_code", "shopify_product_default_code", "product_description", "PRODUCT_TEMPLATE_ID", "PRODUCT_ID", "CATEGORY_ID" ] csvwriter = DictWriter(buffer, field_names, delimiter=delimiter) csvwriter.writer.writerow(field_names) rows = [] for template in product_templates: if len(template.attribute_line_ids) > 3: continue if len(template.product_variant_ids.ids ) == 1 and not template.default_code: continue for product in template.product_variant_ids.filtered( lambda variant: variant.default_code): row = { "PRODUCT_TEMPLATE_ID": template.id, "template_name": template.name, "CATEGORY_ID": template.categ_id.id, "product_default_code": product.default_code, "shopify_product_default_code": product.default_code, "PRODUCT_ID": product.id, "product_name": product.name, "product_description": product.description or None, } rows.append(row) if not rows: raise Warning( "No data found to be exported.\n\nPossible Reasons:\n - Number of " "attributes are more than 3.\n - SKU(s) are not set properly." ) csvwriter.writerows(rows) buffer.seek(0) file_data = buffer.read().encode() self.write({ "datas": base64.encodestring(file_data), "file_name": "Shopify_export_product" }) return { "type": "ir.actions.act_url", "url": "web/content/?model=shopify.prepare.product.for.export.ept&id=%s&field=datas&field=datas&download=true&filename=%s.csv" % (self.id, self.file_name + str(datetime.now().strftime("%d/%m/%Y:%H:%M:%S"))), "target": self }
if 'map q' in line: print (line) words = line.split() reg_index_pair = regex.match(words[2]) register = reg_index_pair.group(1) index = int(reg_index_pair.group(2)) reg_map.append((register,index)) outcome[register] = 0 # print ("reg_map = ") # print (reg_map) with open(sys.argv[2], 'a+') as csvfile: # open for appending writer = DictWriter(csvfile, fieldnames=list(outcome.keys())) if not csvfile.read(): writer.writeheader() # get the printouts from all the trials filenames = glob.glob(os.path.splitext(sys.argv[1])[0]+'.trial_*.out') print ("filenames = ") print (filenames) for filename in filenames: with open(filename) as file: # Parse QX Simulator output format state_lines = False # whether these lines are state amplitude lines first_basis = True # whether this is the first state such that we need to find the global phase phase_global = 0.0 for line in file:
from csv import DictReader, DictWriter from collections import defaultdict from operator import itemgetter MIN_SCORE = 5 with open('points.csv') as f: reader = DictReader(f) catpoints = {r['category']: int(r['points']) for r in reader} points = defaultdict(int) with open('contributions.csv') as f: reader = DictReader(f) for r in reader: points[r['company']] += catpoints[r['category']] ranking = sorted(points.items(), key=itemgetter(1), reverse=True) with open('ranking.csv', 'w') as f: writer = DictWriter(f, ['company', 'score']) writer.writeheader() writer.writerows({ 'company': c, 'score': s } for (c, s) in ranking if s >= MIN_SCORE)
def summarize_junctions(sample_dirs, sample_names, gff_filename, output_prefix, genome_d=None, junction_known=None): """ 1. for each sample, read all the GFF, store the junction information (both 0-based) """ junc_by_chr_strand = defaultdict(lambda: defaultdict(lambda: [])) # (chr,strand) --> (donor,acceptor) --> samples it show up in (more than once possible) for sample_name, d in sample_dirs.items(): for r in GFF.collapseGFFReader(os.path.join(d, gff_filename)): n = len(r.ref_exons) if n == 1: continue # ignore single exon transcripts for i in range(n-1): donor = r.ref_exons[i].end-1 # make it 0-based accep = r.ref_exons[i+1].start # start is already 0-based junc_by_chr_strand[r.chr, r.strand][donor, accep].append(sample_name) # write junction report f1 = open(output_prefix+'.junction.bed', 'w') f1.write("track name=junctions description=\"{0}\" useScore=1\n".format(output_prefix)) JUNC_DETAIL_FIELDS = ['chr', 'left', 'right', 'strand', 'num_transcript', 'num_sample', 'genome', 'annotation', 'label'] with open(output_prefix+'.junction_detail.txt', 'w') as f: writer = DictWriter(f, JUNC_DETAIL_FIELDS, delimiter='\t') writer.writeheader() keys = list(junc_by_chr_strand.keys()) keys.sort() for _chr, _strand in keys: v = junc_by_chr_strand[_chr, _strand] v_keys = list(v.keys()) v_keys.sort() labels = cluster_junctions(v_keys) for i,(_donor, _accep) in enumerate(v_keys): rec = {'chr': _chr, 'left': _donor, 'right': _accep, 'strand': _strand, 'num_transcript': len(v[_donor,_accep]), 'num_sample': len(set(v[_donor,_accep]))} #f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t".format(_chr, _donor, _accep, _strand, len(v[_donor,_accep]), len(set(v[_donor,_accep])))) f1.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(_chr, _donor, _accep+1, output_prefix, len(v[_donor,_accep]), _strand)) # if genome is given, write acceptor-donor site if genome_d is None or _chr not in genome_d: rec['genome'] = 'NA' #f.write("NA\t") else: up, down = genome_d[_chr][_donor+1:_donor+3], genome_d[_chr][_accep-2:_accep] if _strand == '+': rec['genome'] = "{0}-{1}".format(str(up.seq).upper(), str(down.seq).upper()) #f.write("{0}-{1}\t".format(str(up.seq).upper(), str(down.seq).upper())) else: rec['genome'] = "{0}-{1}".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper()) #f.write("{0}-{1}\t".format(str(down.reverse_complement().seq).upper(), str(up.reverse_complement().seq).upper())) # if annotation is given, check if matches with annotation if junction_known is None: rec['annotation'] = 'NA' #f.write("NA\n") else: if (_chr, _strand) in junction_known and (_donor, _accep) in junction_known[_chr, _strand]: rec['annotation'] = 'Y' #f.write("Y\t") else: rec['annotation'] = 'N' #f.write("N\t") rec['label'] = "{c}_{s}_{lab}".format(c=_chr, s=_strand, lab=labels[i]) writer.writerow(rec) #f.write("{c}_{s}_{lab}\n".format(c=_chr, s=_strand, lab=labels[i])) f1.close() return junc_by_chr_strand
escritor_csv.writerow([filme, genero, duracao]) # Irá colocar cada dado em seu respectivo local de cabeçalho. else: print('Encerrado.') # OBS: Toda vez q for executar a escrita, o cabeçalho é escrito junto, repetindo ele no texto (isso se usar o modo 'a'). """ # DictWriter from csv import DictWriter with open('filmes2.csv', 'a+', encoding='UTF-8') as arq: cabecalho = ['Título', 'Gênero', 'Duração'] escritor_csv = DictWriter(arq, fieldnames=cabecalho) # Criando cabeçalho. escritor_csv.writeheader() # Inserindo cabeçalho. filme = None while filme != 'sair': filme = str(input('Título: ')) if filme != 'sair': genero = str(input('Gênero: ')) duracao = int(input('Duração em minutos: ')) escritor_csv.writerow({ 'Título': filme, 'Gênero': genero, 'Duração': duracao }) # A chave tem q ser escrita q nem como foi anunciada lá encima.
import numpy as np import argparse import pprint data = list(DictReader(open("pbp-2015.csv", 'r'))) with open("pbp-2015New.csv", "w") as csvFile: fieldnames = [ "GameId", "GameDate", "Quarter", "Minute", "Second", "OffenseTeam", "DefenseTeam", "Down", "ToGo", "YardLine", "SeriesFirstDown", "Description", "SeasonYear", "Yards", "Formation", "PlayType", "IsRush", "IsPass", "IsIncomplete", "IsTouchdown", "PassType", "IsSack", "IsChallenge", "IsChallengeReversed", "IsInterception", "IsFumble", "IsPenalty", "IsTwoPointConversion", "IsTwoPointConversionSuccessful", "RushDirection", "YardLineFixed", "YardLineDirection", "IsPenaltyAccepted", "PenaltyTeam", "IsNoPlay", "PenaltyType", "PenaltyYards", "HomeTeam", "VisitingTeam", "HomeTeamFinalScore", "VisitingTeamFinalScore" ] writer = DictWriter(csvFile, fieldnames=fieldnames, extrasaction='ignore') writer.writeheader() for play in data: del play["NextScore"] del play["IsMeasurement"] del play["Challenger"] del play["TeamWin"] writer.writerow(play)
now = dt.datetime.utcnow() logging.basicConfig(filename='ATSDTA_ATSP600_clean' + str(now) + '.log', level=logging.WARNING) pnum = re.compile(r'^P\d{3}$') tickmarks = re.compile(r"^`+$") CSVFILE2013 = '../data/JeffersonCo/JeffcoData Nov 2013/ATSDTA_ATSP600.txt' CSVFILECLEAN2013 = '../data/JeffersonCo/JeffcoData Nov 2013/CLEANED_ATSDTA_ATSP600.csv' #CSVFILECLEAN2013 = '/Users/vmd/GitHub/Jeffco-Properties/Data/JeffersonCo/Datasets/test1.csv' with open(CSVFILE2013, 'r', encoding='utf-8', errors='ignore', newline='') as csvread: reader = DictReader(csvread) with open(CSVFILECLEAN2013, 'w') as csvwrite: writer = DictWriter(csvwrite, delimiter=',', fieldnames=reader.fieldnames) writer.writeheader() for line in reader: for k,i in line.items(): line[k] = i.strip(" ") if tickmarks.match(line['PRPZIP4']): line['PRPZIP4'] = '' logging.debug(' Cleaned "^`+$" match from line ' + line['SCH'] + ' PRPZIP4 ' + str(line['PRPZIP4']),) if pnum.match(line['TTD']): line['TTD'] = line['TTD'].strip('P') logging.debug(' Cleaned "^P\d{3}$" match from line ' + line['SCH'] + ' TTD ' + str(line['TTD']),) if pnum.match(line['ATD']): line['ATD'] = line['ATD'].strip('P') logging.debug(' Cleaned "^P\d{3}$" match from line ' + line['SCH'] + ' ATD ' + str(line['ATD']),) if line['ALTATD'] == 'N99': line['ALTATD'] = ''
def collect_cluster_results(group_csv, out_dir, output_prefix, use_BC=False, indices_to_use=None): """ <index>,<UMI>,<locus>,<transcript or NA>,<ccs_id> # HQ/LQ transcript IDs go from transcript/0 --> 1-TCAAGGTC-transcript/0 # however singletons (shouldn't have much post cluster) will keep the CCS ID which is ok """ f_out_rep = open(output_prefix + '.clustered_transcript.rep.fasta', 'w') f_out_not = open(output_prefix + '.clustered_transcript.not_rep.fasta', 'w') f_csv = open(output_prefix + '.clustered_transcript.csv', 'w') writer = DictWriter( f_csv, fieldnames=['index', 'UMI', 'BC', 'locus', 'cluster', 'ccs_id'], delimiter=',') #writer.writeheader() bad = [] for r in DictReader(open(group_csv), delimiter=','): index = r['index'] if indices_to_use is not None and index not in indices_to_use: continue umi_key = r['UMI'] if use_BC: umi_key += '-' + r['BC'] members = list(set(r['members'].split(','))) if len(members) == 1: # singleton, no directory, continue continue d = os.path.join(out_dir, index, umi_key) hq = os.path.join(d, 'output.hq.fasta.gz') lq = os.path.join(d, 'output.lq.fasta.gz') single = os.path.join(d, 'output.singletons.fasta.gz') report = os.path.join(d, 'output.cluster_report.csv') if not os.path.exists(d): print("{0} does not exist! DEBUG mode, ok for now".format(d)) bad.append(d) continue if not os.path.exists(report): print("{0} does not exist! DEBUG mode, ok for now".format(report)) bad.append(report) continue hq_count = 0 if os.path.exists(hq): with gzip.open(hq, 'rt') as handle: for seqrec in SeqIO.parse(handle, 'fasta'): if hq_count == 0: f_out_rep.write(">{i}-{u}-{n}\n{s}\n".format( i=index, u=umi_key, n=seqrec.id, s=seqrec.seq)) else: f_out_not.write(">{i}-{u}-{n}\n{s}\n".format( i=index, u=umi_key, n=seqrec.id, s=seqrec.seq)) hq_count += 1 if os.path.exists(lq): with gzip.open(lq, 'rt') as handle: for seqrec in SeqIO.parse(handle, 'fasta'): f_out_not.write(">{i}-{u}-{n}\n{s}\n".format(i=index, u=umi_key, n=seqrec.id, s=seqrec.seq)) info = { 'index': index, 'UMI': r['UMI'], 'BC': r['BC'] if use_BC else 'NA', 'locus': r['locus'] } for ccs_rec in DictReader(open(report), delimiter=','): # cluster_id,read_id,read_type # transcript/0,m64012_191109_035807/102828567/ccs,FL # transcript/0,m64012_191109_035807/121700628/ccs,FL info['cluster'] = ccs_rec['cluster_id'] info['ccs_id'] = ccs_rec['read_id'] writer.writerow(info) # NOTE: singletons are not part of output.cluster_report.csv info['cluster'] = 'NA' if os.path.exists(single): with gzip.open(single, 'rt') as handle: for seqrec in SeqIO.parse(handle, 'fasta'): f_out_not.write(">{i}-{u}-{n}\n{s}\n".format(i=index, u=umi_key, n=seqrec.id, s=seqrec.seq)) info['ccs_id'] = seqrec.id writer.writerow(info) f_out_not.close() f_out_rep.close() f_csv.close() return bad, f_out_rep.name, f_csv.name
def append_row(filename, dict_of_elem): field_names = dict_of_elem.keys() with open(filename, 'a+', newline='') as write_obj: dict_writer = DictWriter(write_obj, fieldnames=field_names) dict_writer.writerow(dict_of_elem)
def create_csv(csv_path, headers): """Creates the CSV file with the headers (must be a list)""" with open(csv_path, 'w') as csv_file: writer = DictWriter(csv_file, fieldnames=headers) writer.writeheader()
def _save_to_csv(quotes): with open('quotes.csv', 'w') as file: DW_object = DictWriter(file, fieldnames=quotes[0].keys()) DW_object.writeheader() DW_object.writerows(quotes)
# catsfile = file.read() # print(catsfile) print(" -- reader() from ** writed cats.csv-- ") from csv import reader with open("cats.csv") as file: csv_reader = reader(file) # print(list(csv_reader)) #option 1 for row in csv_reader: #option 2 print(row) print("-----------DicWriter--------------------") # Version using DictWriter with open("cats2.csv", "w") as file: headers = ["Name", "Breed", "Age"] csv_writer = DictWriter(file, fieldnames=headers) csv_writer.writeheader() csv_writer.writerow({ "Name": "Garfield", "Breed": "Orange Tabby", "Age": 10 }) with open("cats2.csv", "r") as file: lines = file.readlines() print(lines) print("-------------------") with open("cats2.csv", "r") as file: result = file.read() print(result)
def gaVRPMS(instName, unitCost, initCost, waitCost, delayCost, lightUnitCost, lightInitCost, lightWaitCost, lightDelayCost, indSize, popSize, cxPb, mutPb, NGen, exportCSV=False, customizeData=False): if customizeData: jsonDataDir = os.path.join(BASE_DIR, 'data', 'json_customize') else: jsonDataDir = os.path.join(BASE_DIR, 'data', 'json') jsonFile = os.path.join(jsonDataDir, '%s.json' % instName) with open(jsonFile) as f: instance = load(f) creator.create('FitnessMax', base.Fitness, weights=(1.0, )) creator.create('Individual', list, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Attribute generator toolbox.register('indexes', random.sample, range(1, indSize + 1), indSize) # Structure initializers toolbox.register('individual', tools.initIterate, creator.Individual, toolbox.indexes) toolbox.register('population', tools.initRepeat, list, toolbox.individual) # Operator registering toolbox.register('evaluate', evalVRPMS, instance=instance, unitCost=unitCost, initCost=initCost, waitCost=waitCost, delayCost=delayCost, lightUnitCost=lightUnitCost, lightInitCost=lightInitCost, lightWaitCost=lightWaitCost, lightDelayCost=lightDelayCost) toolbox.register('select', tools.selRoulette) toolbox.register('mate', cxPartialyMatched) toolbox.register('mutate', mutInverseIndexes) pop = toolbox.population(n=popSize) # Results holders for exporting results to CSV file csvData = [] print('Start of evolution') # Evaluate the entire population fitnesses = list(map(toolbox.evaluate, pop)) for ind, fit in zip(pop, fitnesses): ind.fitness.values = fit print(' Evaluated %d individuals' % len(pop)) # Begin the evolution for g in range(NGen): print('-- Generation %d --' % g) # Select the next generation individuals offspring = toolbox.select(pop, len(pop)) # Clone the selected individuals offspring = list(map(toolbox.clone, offspring)) # Apply crossover and mutation on the offspring for child1, child2 in zip(offspring[::2], offspring[1::2]): if random.random() < cxPb: toolbox.mate(child1, child2) del child1.fitness.values del child2.fitness.values for mutant in offspring: if random.random() < mutPb: toolbox.mutate(mutant) del mutant.fitness.values # Evaluate the individuals with an invalid fitness invalidInd = [ind for ind in offspring if not ind.fitness.valid] fitnesses = map(toolbox.evaluate, invalidInd) for ind, fit in zip(invalidInd, fitnesses): ind.fitness.values = fit print(' Evaluated %d individuals' % len(invalidInd)) # The population is entirely replaced by the offspring pop[:] = offspring # Record statistics into the logbook stats = tools.Statistics(key=lambda ind: ind.fitness.values) stats.register('avg', numpy.mean) stats.register('std', numpy.std) stats.register('min', numpy.min) stats.register('max', numpy.max) record = stats.compile(pop) print(record) # Gather all the fitnesses in one list and print the stats fits = [ind.fitness.values[0] for ind in pop] length = len(pop) mean = sum(fits) / length sum2 = sum(x * x for x in fits) std = abs(sum2 / length - mean**2)**0.5 print(' Min %s' % min(fits)) print(' Max %s' % max(fits)) print(' Avg %s' % mean) print(' Std %s' % std) # Write data to holders for exporting results to CSV file if exportCSV: csvRow = { 'generation': g, 'evaluated_individuals': len(invalidInd), 'min_fitness': min(fits), 'max_fitness': max(fits), 'avg_fitness': mean, 'std_fitness': std, 'avg_cost': 1 / mean, } csvData.append(csvRow) print('-- End of (successful) evolution --') bestInd = tools.selBest(pop, 1)[0] print('Best individual: %s' % bestInd) print('Fitness: %s' % bestInd.fitness.values[0]) printRoute(ind2route(bestInd, instance)) print('Total cost: %s' % (1 / bestInd.fitness.values[0])) if exportCSV: csvFilename = '%s_uC%s_iC%s_wC%s_dC%s_iS%s_pS%s_cP%s_mP%s_nG%s.csv' % ( instName, unitCost, initCost, waitCost, delayCost, indSize, popSize, cxPb, mutPb, NGen) csvPathname = os.path.join(BASE_DIR, 'results', csvFilename) print('Write to file: %s' % csvPathname) makeDirsForFile(pathname=csvPathname) if not exist(pathname=csvPathname, overwrite=True): with open(csvPathname, 'w') as f: fieldnames = [ 'generation', 'evaluated_individuals', 'min_fitness', 'max_fitness', 'avg_fitness', 'std_fitness', 'avg_cost' ] writer = DictWriter(f, fieldnames=fieldnames, dialect='excel') writer.writeheader() for csvRow in csvData: writer.writerow(csvRow)
f = ROOT.TFile(fname) tree = f.DecayTree attrs = { 'mass': lambda tree: tree.D0_MM, 'decaytime': lambda tree: tree.D0_TAU * 1000., 'pt': lambda tree: tree.D0_PT, 'ipchi2': lambda tree: tree.D0_MINIPCHI2 } ranges = { 'mass': (1815, 1915), 'decaytime': (0.15, 6.), } with open('D0KpiData.csv', 'w') as fout: writer = DictWriter(fout, attrs.keys()) writer.writeheader() for i in range(tree.GetEntries()): tree.GetEntry(i) vals = {attr: func(tree) for attr, func in attrs.items()} inrange = True for attr, (vmin, vmax) in ranges.items(): if vals[attr] < vmin or vals[attr] > vmax: inrange = False break if not inrange: continue writer.writerow(vals)
def openVideoWriter(self, filename, encoder=None, overwrite=False, quality=75, bitrate=1000000, img_size=None, csv_timestamps=True, embed_image_info=['timestamp']): """ Opens a video writer. Subsequent calls to .get_image() will additionally write those frames out to the file. Parameters ---------- filename : str Path to desired output file. If extension is omitted it will be inferred from <file_format> (if specified). encoder : str { 'AVI' | 'MJPG' | 'H264' } or None, optional Output encoder to use. If None, will automatically set to 'AVI' if filename ends with an '.avi' extension, 'H264' if filename ends with a 'mp4' extension, or will raise an error for other extensions. Note that 'MJPG' and 'H264' formats permit addtional arguments to be passed. The default is None. overwrite : bool, optional If False and the output file already exists, an error will be raised. The default is False. quality : int, optional Value between 0-100 determining output quality. Only applicable for MJPG format. The default is 75. bitrate : int, optional Bitrate to encode at. Only applicable for H264 format. The default is 1000000. img_size : (W,H) tuple of ints, optional Image resolution. Only applicable for H264 format. If not given, will attempt to determine from camera's video mode, but this might not work. The default is None. csv_timestamps : bool, optional If True, timestamps for each frame will be saved to a csv file corresponding to the output video file. The default is True. Note that to get 1394 cycle timestamps (more accurate), the timestamp property MUST be enabled within the embedded image info. embed_image_info : list or 'all' or None, optional List of properties to embed within top-left image pixels. Note that video MUST be monochrome for pixel values to be usable. Available properties are: [timestamp, gain, shutter, brightness, exposure, whiteBalance, frameCounter, strobePattern, ROIPosition]. Alternatively specify 'all' to use all available properties. Specify None to not embed any properties. The timestamp property MUST be enabled to get 1394 cycle timestamps in the CSV file (if applicable), regardless of whether the embedded information itself is going to be used. The default is to embed timestamps. """ # Try to auto-determine file format if unspecified if encoder is None: ext = os.path.splitext(filename)[1].lower() # case insensitive if ext == '.avi': encoder = 'AVI' elif ext == '.mp4': encoder = 'H264' elif not ext: raise ValueError('Cannot determine file_format automatically ' 'without file extension') else: raise ValueError('Cannot determine file_format automatically ' f'from {ext} extension') print(f'Recording using {encoder} encoder') encoder = encoder.upper() # ensure case insensitive if not encoder in ['AVI', 'MJPG', 'H264']: raise ValueError("Encoder must be one of 'AVI', 'MJPG', or 'H264, " f"but received {encoder}") # Auto-determine file extension if necessary if not os.path.splitext(filename)[1]: if encoder in ['AVI', 'MJPG']: filename += '.avi' elif encoder == 'H264': filename += '.mp4' # Without overwrite, error if file exists. AVI writer sometimes # appends a bunch of zeros to name, so check that too. if not overwrite: _filename, ext = os.path.splitext(filename) alt_filename = _filename + '-0000' + ext if os.path.isfile(filename) or os.path.isfile(alt_filename): raise OSError(f'Output file {filename} already exists') # Update camera to embed image info available_info = self.cam.getEmbeddedImageInfo().available prop_keys = [k for k in dir(available_info) if not k.startswith('__')] props = dict((k, False) for k in prop_keys) if embed_image_info: if not isinstance(embed_image_info, (list, tuple)): embed_image_info = [embed_image_info] if 'all' in embed_image_info: for k in prop_keys: props[k] = getattr(available_info, k) else: # use specified values for k in embed_image_info: if k not in prop_keys: raise KeyError( "Embedded property must be one of " f"list({prop_keys}), but received '{k}'") elif not getattr(available_info, k): raise ValueError( f"'{k}' embedded property not available") props[k] = True self.cam.setEmbeddedImageInfo(**props) # Open csv writer for timestamps? if csv_timestamps: csv_filename = os.path.splitext(filename)[0] + '.csv' if not overwrite and os.path.isfile(csv_filename): raise OSError(f'Timestamps file {csv_filename} already exists') self.csv_fd = open(csv_filename, 'w') fieldnames = [ 'seconds', 'microSeconds', 'cycleSeconds', 'cycleCount', 'cycleOffset' ] self.csv_writer = DictWriter(self.csv_fd, fieldnames, delimiter=',', lineterminator='\n') self.csv_writer.writeheader() # Initialise video writer, allocate to class self.video_writer = PyCapture2.FlyCapture2Video() # Open video file bytes_filename = filename.encode('utf-8') # needs to be bytes string if encoder == 'AVI': self.video_writer.AVIOpen(bytes_filename, self.fps) elif encoder == 'MJPG': self.video_writer.MJPGOpen(bytes_filename, self.fps, quality) elif encoder == 'H264': if img_size is None: if self.img_size is None: raise RuntimeError('Cannot determine image resolution') else: img_size = self.img_size W, H = img_size self.video_writer.H264Open(bytes_filename, self.fps, W, H, bitrate) # Success! self._video_writer_isOpen = True
def prep_results_file(res_file, fieldnames): with open(res_file, 'w') as f: writer = DictWriter(f, fieldnames=fieldnames) writer.writeheader()
# Train classifier and do cross validation lr = SGDClassifier(loss='log', penalty='l2', shuffle=True) if args.cvt: print("Start cross validation...") cv = ShuffleSplit(len(y_train), args.cvt, test_size=args.cvp, random_state=int(time.time())) scores = cross_val_score(lr, x_train, y_train, cv=cv, scoring='accuracy', verbose=1) print(scores) print(scores.mean(), scores.std()) # Run test data print("Run test set...") x_test = feat.test_feature(test, meta_info) lr.fit(x_train, y_train) feat.show_top(lr, labels, args.top) predictions = lr.predict(x_test) o = DictWriter(open(args.output, 'w'), ['id', 'spoiler']) o.writeheader() for ii, pp in zip([x['id'] for x in test], predictions): d = {'id': ii, 'spoiler': labels[pp]} o.writerow(d) print 'Finish!'
def export_bids(raweeg=None, bids_paths=None, participants=None, overwrite=False, verbose=False): '''Export recordings in BIDS format. Args: raweeg : array EEG streams previously imported as MNE RawArray instances. bids_paths : array BIDS paths as MNE-BIDS BIDSPath instances. participants : array The participants in recordings. Template available in README. overwrite : bool Overwrite existing BIDS recordings. verbose : bool Show process while exporting. Raises: ValueError: if no stream is specified in raweeg, or raweeg and bids_paths do not have the same lenght. See also: create_bids_path ''' if raweeg is None or bids_paths is None: raise ValueError( 'You must enter EEG recording and BIDS path array parameters.') raweeg = [raweeg] if not isinstance(raweeg, list) else raweeg bids_paths = [bids_paths ] if not isinstance(bids_paths, list) else bids_paths participants = [participants ] if not isinstance(participants, list) else participants if len(raweeg) != len(bids_paths): raise ValueError('BIDS path and eeg arrays must have the same length.') # Create BIDS for index, recording in enumerate(raweeg): if system() == 'Windows': temporal_file_path = gettempdir() + '\\dummy_raw.fif' else: temporal_file_path = gettempdir() + '/dummy_raw.fif' if path.exists(temporal_file_path): remove(temporal_file_path) # Temporal file storing because of package requirements recording.save(temporal_file_path) file_rec = io.Raw(fname=temporal_file_path) # Create BIDS structure with EEG data write_raw_bids(raw=file_rec, bids_path=bids_paths[index], events_data=None, event_id=None, anonymize=None, overwrite=overwrite, verbose=verbose) remove(temporal_file_path) print('Exported recording: ', recording.annotations.description) # Fill participants info file if participants != 'None': for subject in participants: root_folder = r'%s' % subject['root'] if system() == 'Windows': if root_folder[-1] == '\\': root_folder = root_folder + 'participants.tsv' else: root_folder = root_folder + '\\participants.tsv' else: if root_folder[-1] == '/': root_folder = root_folder + 'participants.tsv' else: root_folder = root_folder + '/participants.tsv' with open(root_folder, mode='r+', encoding='utf-8-sig') as tsvfile: reader = DictReader(tsvfile, dialect='excel-tab') data = [] for par in reader: for par2 in participants: if par['participant_id'] == 'sub-' + par2['participant_id']: par['age'] = par2['age'] par['sex'] = par2['sex'] par['hand'] = par2['hand'] break data.append(par) tsvfile.seek(0) tsvfile.truncate() writer = DictWriter( tsvfile, dialect='excel-tab', fieldnames=['participant_id', 'age', 'sex', 'hand']) writer.writeheader() for row in data: writer.writerow(row)
from sys import argv from csv import DictReader, DictWriter if len(argv) > 1: for filename in argv[1:]: try: with open(filename, mode="r", encoding="ISO-8859-1", newline="") as f: entries = [entry for entry in DictReader(f)] end = len(entries) for i in range(end // 2): entries[i]["Position"], entries[end - 1 - i]["Position"] = ( entries[end - 1 - i]["Position"], entries[i]["Position"], ) except Exception as e: print(e) else: with open(filename, mode="w", encoding="ISO-8859-1", newline="") as f: writer = DictWriter(f, fieldnames=list(entries[0].keys())) writer.writeheader() writer.writerows(reversed(entries)) print("[OK]", filename) else: print("No files given")
def visit_schema(self, **_) -> None: self.classtab = DictWriter(sys.stdout, ['Class Name', 'Parent Class', 'YAML Class Name', 'Description', 'Flags', 'Slot Name', 'YAML Slot Name', 'Range', 'Card', 'Slot Description', 'URI'], dialect=self.dialect) self.classtab.writeheader()
def sort_flows(read, localhost, output): print("Opening reader...") pcap = dpkt.pcapng.Reader(read) flow_sorter = FlowSorter(localhost=localhost) print("Sorting packets...") flow_sorter.sort(pcap) if not os.path.exists(output): try: os.mkdir(output) except IOError as e: print("Error creating directory: {}, {}".format(output, e)) flows = flow_sorter.flows() flow_row = {} flow_num = 0 with open(os.path.join(output, 'flows.csv'), 'w') as flows_file: flows_fields = [ 'id', 'start_time', 'end_time', 'duration', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'total_bytes', 'total_message_bytes', 'avg_inter_arrival_time', 'median_inter_arrival_time', 'std_inter_arrival_time', 'num_packets', 'src_names', 'dst_names' ] packet_fields = [ 'timestamp', 'inter_arrival_time', 'seq', 'ack', 'flag_CWR', 'flag_ECE', 'flag_URG', 'flag_ACK', 'flag_PSH', 'flag_RST', 'flag_SYN', 'flag_FIN', 'window_size', 'urgent_pointer', 'tcp_header_size', 'tcp_payload_size', 'ip_header_size', 'ip_id', 'ip_tos', 'ip_df', 'ip_mf', 'ip_offset' ] flows_writer = DictWriter(flows_file, fieldnames=flows_fields) flows_writer.writeheader() flow_row = {} packet_row = {} for flow in sorted(flows.values(), key=lambda flow: flow.first_timestamp()): flow_row['id'] = flow_num flow_row['src_ip'] = str(flow.src_host().ip_addr()) flow_row['src_port'] = flow.src_host().port() flow_row['src_names'] = flow.src_host().original_names() flow_row['dst_ip'] = str(flow.dst_host().ip_addr()) flow_row['dst_port'] = flow.dst_host().port() flow_row['dst_names'] = flow.dst_host().original_names() flow_row['total_bytes'] = flow.total_bytes() flow_row['total_message_bytes'] = flow.total_message_bytes() inter_arrival_times = flow.inter_arrival_times() timestamps = flow.timestamps() flow_row['avg_inter_arrival_time'] = inter_arrival_times.mean( ) if len(flow) > 1 else np.nan flow_row['median_inter_arrival_time'] = np.median( inter_arrival_times) if len(flow) > 1 else np.nan flow_row['std_inter_arrival_time'] = inter_arrival_times.std( ) if len(flow) > 1 else np.nan flow_row['start_time'] = timestamps[0] flow_row['end_time'] = timestamps[-1] flow_row['duration'] = timestamps[-1] - timestamps[0] flow_row['num_packets'] = len(flow) flows_writer.writerow(flow_row) with open(os.path.join(output, 'flow_{}.csv'.format(flow_num)), 'w') as packets_file: packet_num = 0 packet_writer = DictWriter(packets_file, packet_fields) packet_writer.writeheader() for timestamp, packet in flow: packet_row['timestamp'] = timestamp if packet_num == 0: packet_row['inter_arrival_time'] = np.nan else: packet_row['inter_arrival_time'] = inter_arrival_times[ packet_num - 1] packet_row['seq'] = packet.data.seq packet_row['ack'] = packet.data.ack packet_row['flag_CWR'] = int( bool(packet.data.flags & dpkt.tcp.TH_CWR)) packet_row['flag_ECE'] = int( bool(packet.data.flags & dpkt.tcp.TH_ECE)) packet_row['flag_URG'] = int( bool(packet.data.flags & dpkt.tcp.TH_URG)) packet_row['flag_ACK'] = int( bool(packet.data.flags & dpkt.tcp.TH_ACK)) packet_row['flag_PSH'] = int( bool(packet.data.flags & dpkt.tcp.TH_PUSH)) packet_row['flag_RST'] = int( bool(packet.data.flags & dpkt.tcp.TH_RST)) packet_row['flag_SYN'] = int( bool(packet.data.flags & dpkt.tcp.TH_SYN)) packet_row['flag_FIN'] = int( bool(packet.data.flags & dpkt.tcp.TH_FIN)) packet_row['window_size'] = packet.data.win packet_row['urgent_pointer'] = packet.data.urp packet_row['tcp_payload_size'] = len(packet.data.data) packet_row['tcp_header_size'] = len(packet.data) - len( packet.data.data) packet_row['ip_header_size'] = packet.hl packet_row['ip_id'] = packet.id packet_row['ip_tos'] = packet.tos packet_row['ip_df'] = packet.df packet_row['ip_mf'] = packet.mf packet_row['ip_offset'] = packet.offset packet_writer.writerow(packet_row) packet_num += 1 flow_num += 1
# print "%.3f" % nltk.classify.accuracy(classifier, dev_test) classifier.show_most_informative_features(200) # classifier.prob_classify(featurize(name)) right = 0 total = len(dev_test) for ii in dev_test: prediction = classifier.classify(ii[0]) if prediction == ii[1]: right += 1 sys.stderr.write("Accuracy on dev: %f\n" % (float(right) / float(total))) if testfile is None: sys.stderr.write("No test file passed; stopping.\n") else: # Retrain on all data classifier = nltk.classify.NaiveBayesClassifier.train(dev_train + dev_test) # Read in test section test = {} for ii in DictReader(testfile, delimiter='\t'): test[ii['id']] = classifier.classify(fe.features(ii['text'])) # Write predictions o = DictWriter(outfile, ['id', 'pred']) o.writeheader() for ii in sorted(test): o.writerow({'id': ii, 'pred': test[ii]})
out.append(v) #Remove the no data value if any if no_data: uniques.discard(n) #Export to a tab delimited file if len(bands) > 1: outNm = re.sub(".txt", "_" + str(b) + ".txt", outTxt) else: outNm = outTxt with open(outNm, "w") as f: dict_writer = DictWriter(f, ['id'] + sorted( uniques, key=lambda x: float(re.sub('^' + prefix, '', x))), extrasaction='ignore', delimiter="\t", restval="0") dict_writer.writeheader() for p in out: dict_writer.writerow(p) if statistics: for b, s in zip(bands, stats): #Transform the result dictionary for the export out = [] #Prepare a set to hold the unique values in the raster for k, v in s.items(): v['id'] = k out.append(v)