def main(): # path = 'ExampleData/input_files/' # endPath = 'ExampleData/test/' path, endPath = getFilePath() reader_fgi = list(csv.reader(open(path + "fGI_stats.csv", "rt", encoding="ascii"), delimiter=",")) reader_core = list(csv.reader(open(path + "Core_attfGI.csv", "rt", encoding="ascii"), delimiter=",")) genomeListing = list(open(path + "db.txt", "r")) genomeClusterDict = pickle.load(open(path + "genomeCluster.dict", "rb")) genomeLocusDict = pickle.load(open(path + "genomeLocus.dict", "rb")) coreDict, fgiDict = createCoreClusterDict(reader_core) # genome = 'E00002' genomeIdDict = {} index = 3 for genome2 in genomeListing: if "\n" in genome2: genome2 = genome2[0:-1] genomeIdDict[genome2] = index index += 1 for genome in genomeIdDict: genomeDict = createfgiInsertDict(reader_fgi, genome) referenceList = createfGIFeatures( genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome] ) writeFile(endPath, genome, referenceList) genomeDict = createfgiInsertDict(reader_fgi, genome) referenceList = createfGIFeatures( genomeDict, coreDict, fgiDict, genomeClusterDict, genomeLocusDict, genome, genomeIdDict[genome] ) writeFile(endPath, genome, referenceList)
def solve(self): filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv" filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv" reader_old = csv.reader(urllib.urlopen(filename_old)) reader_total = csv.reader(urllib.urlopen(filename_total)) count_line_old = 3 old_num = [] for row in reader_old: if count_line_old > 0: count_line_old -= 1; continue; old_num.append(int(row[1])) count_line_total = 5 total_num = [] for row in reader_total: if count_line_total > 0: count_line_total -= 1 continue total_num.append(int(row[4])) old_rate = [] for i in range(len(old_num)): old_rate.append(100.0 * old_num[i-1] / total_num[i-1]) a = pd.Series(old_rate) x = a.mean() std = a.std() var = a.var() # var = s^2 z = t.isf(0.05, 31) mean_lower = x - std / math.sqrt(31) * z mean_upper = x + std / math.sqrt(31) * z std_lower = 31 * var / chi2.isf(0.05, 31) std_upper = 31 * var / chi2.isf(0.95, 31) result = [[mean_lower, mean_upper], [std_lower, std_upper]] print result return result
def run(self, infile, label, outfile, existing=None): existing_rows = {} if existing: with open(existing, 'rU') as csvfile: reader = csv.reader(csvfile) existing_rows = dict([(x[0], x[1:]) for x in reader]) with open(infile, 'rU') as csvfile: reader = csv.reader(csvfile) # Ignore the header next(reader) for file_name, true_val, pred_val in reader: if existing and file_name not in existing_rows: raise Exception( "The file name ({0}) doesn't exist in the existing file.".format(file_name)) if not existing: existing_rows[file_name] = ['', ''] existing_rows[file_name] = ( _combine_values(existing_rows[file_name][0], true_val, label), _combine_values(existing_rows[file_name][1], pred_val, label) ) with open(outfile, 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(['file_name', 'true', 'predicted']) for k, v in existing_rows.items(): writer.writerow((k, *v))
def foreignCards(self): self.sniff() # process all lines log = [] cards = [] lineNum = 0 ignored = 0 if self.delimiter: reader = csv.reader(self.data, delimiter=self.delimiter) else: reader = csv.reader(self.data, self.dialect) for row in reader: try: row = [unicode(x, "utf-8") for x in row] except UnicodeDecodeError, e: raise ImportFormatError( type="encodingError", info=_("The file was not in UTF8 format.")) if len(row) != self.numFields: log.append(_( "'%(row)s' had %(num1)d fields, " "expected %(num2)d") % { "row": u" ".join(row), "num1": len(row), "num2": self.numFields, }) ignored += 1 continue card = self.cardFromFields(row) cards.append(card)
def i_check_anomaly_scores(step, check_file): check_file = res_filename(check_file) predictions_file = world.output try: predictions_file = csv.reader(open(predictions_file, "U"), lineterminator="\n") check_file = csv.reader(open(check_file, "U"), lineterminator="\n") for row in predictions_file: check_row = check_file.next() if len(check_row) != len(row): assert False for index in range(len(row)): dot = row[index].find(".") if dot > 0 or (check_row[index].find(".") > 0 and check_row[index].endswith(".0")): try: decimal_places = min(len(row[index]), len(check_row[index])) - dot - 1 row[index] = round(float(row[index]), decimal_places) check_row[index] = round(float(check_row[index]), decimal_places) except ValueError: pass if check_row[index] != row[index]: print row, check_row assert False assert True except Exception, exc: assert False, str(exc)
def get_fluence(e_0=100.0): """ Returns a function representing the electron fluence with the distance in CSDA units. Args: e_0 (float): The kinetic energy whose CSDA range is used to scale the distances. Returns: A function representing fluence(x,u) with x in CSDA units. """ # List of available energies e0_str_list = list(map(lambda x: (os.path.split(x)[1]).split(".csv")[ 0], glob(os.path.join(data_path, "fluence", "*.csv")))) e0_list = sorted(list(map(int, list(filter(str.isdigit, e0_str_list))))) e_closest = min(e0_list, key=lambda x: abs(x - e_0)) with open(os.path.join(data_path, "fluence/grid.csv"), 'r') as csvfile: r = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) t = next(r) x = np.array([float(a) for a in t[0].split(",")]) t = next(r) u = np.array([float(a) for a in t[0].split(",")]) t = [] with open(os.path.join(data_path, "fluence", "".join([str(e_closest), ".csv"])), 'r') as csvfile: r = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in r: t.append([float(a) for a in row[0].split(",")]) t = np.array(t) f = interpolate.RectBivariateSpline(x, u, t, kx=1, ky=1) # Note f is returning numpy 1x1 arrays return f
def get_cs(e_0=100, z=74): """ Returns a function representing the scaled bremsstrahlung cross_section. Args: e_0 (float): The electron kinetic energy, used to scale u=e_e/e_0. z (int): Atomic number of the material. Returns: A function representing cross_section(e_g,u) in mb/keV, with e_g in keV. """ # NOTE: Data is given for E0>1keV. CS values below this level should be used with caution. # The default behaviour is to keep it constant with open(os.path.join(data_path, "cs/grid.csv"), 'r') as csvfile: r = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) t = next(r) e_e = np.array([float(a) for a in t[0].split(",")]) log_e_e = np.log10(e_e) t = next(r) k = np.array([float(a) for a in t[0].split(",")]) t = [] with open(os.path.join(data_path, "cs/%d.csv" % z), 'r') as csvfile: r = csv.reader(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in r: t.append([float(a) for a in row[0].split(",")]) t = np.array(t) scaled = interpolate.RectBivariateSpline(log_e_e, k, t, kx=3, ky=1) m_electron = 511 z2 = z * z return lambda e_g, u: (u * e_0 + m_electron) ** 2 * z2 / (u * e_0 * e_g * (u * e_0 + 2 * m_electron)) * ( scaled(np.log10(u * e_0), e_g / (u * e_0)))
def load_csv(filename): """ loads csv to a data_set :param filename: csv file input attr0,attr1,attr2,attr3,class1 attr0,attr1,attr2,attr3,class1 attr0,attr1,attr2,attr3,class1 ... :return data_set: a list of instances as array of attributes data_set = [ ['attr0','attr1','attr2','attr3','class1'], ['attr0','attr1','attr2','attr3','class2'], ['attr0','attr1','attr2','attr3','class1'], ... ] """ if isinstance(filename, file): lines = csv.reader(filename) else: lines = csv.reader(open(filename, "rb")) data_set = list(lines) for i in range(1, len(data_set)): data_set[i] = [x for x in data_set[i]] return data_set
def _list_outputs(self, test_output, expected_output): """ Returns outputs from test outputs and expected outputs. To be compared in the test. Parameters: - test_output: application's output name - expected_output: file name of the expected output Output: - test_list: the contents of the test output in a list - output_list: the contents of the expected output in a list """ # Open the files test_file = open(test_output, 'r') expected_file = open(expected_output, 'r') # Create respective reader and writers test_reader = csv.reader(test_file) expected_reader = csv.reader(expected_file) # Listify test_list = list(test_reader) expected_list = list(expected_reader) # Close the files test_file.close() expected_file.close() return test_list, expected_list
def main(pre_processed, genome_sam): read_SJ = defaultdict(set) black_list = set([]) # for row in csv.reader(open(dust), delimiter = '>'): # black_list.add(row[1]) # for row in csv.reader(open(repbase), delimiter = '\t'): # black_list.add(row[9]) for row in csv.reader(open(genome_sam), delimiter = '\t'): if row[1]=="0" or row[1]=="16": black_list.add(row[0]) for row in csv.reader(open(pre_processed), delimiter = '\t'): read, flag, tag, start, cigar, seq, qual = row SJ = tag.split("|")[0] read_SJ[read].add(SJ) for row in csv.reader(open(pre_processed), delimiter = '\t'): read, flag, tag, start, cigar, seq, qual = row if (read in black_list)==False and len(read_SJ[read])==1: print "\t".join(row)
def main(): ''' Script is meant to be called from terminal command line. Look at help for more information on the inputs. ''' parser = argparse.ArgumentParser() parser.add_argument('-i', '--input', help = 'Input file name ending in .csv') parser.add_argument('-o', '--output', help ='Output file name ending in .csv') parser.add_argument('-c', '--columns', action='store', nargs = '*', help = 'Column index OR title. The output columns will be in the same order as the user input.') args = parser.parse_args() with open(args.output,'w') as output_file: w = csv.writer(output_file) with open(args.input, 'r') as input_file: reader = csv.reader(input_file) header = reader.next() ind = [n if n.isdigit() else header.index(n) for n in args.columns] included_cols = map(int, ind) fid = csv.reader(input_file, delimiter = ',') header_new = [header[i] for i in included_cols] w.writerow(header_new) for row in fid: item = [row[i] for i in included_cols] w.writerow(item)
def dataImport(): global testData global trainData marketingTrain = open('/Users/annu/Desktop/Deposit_Marketing/Training.csv', 'rb') trainData = list(csv.reader(marketingTrain, delimiter = ',')) marketingTest = open('/Users/annu/Desktop/Deposit_Marketing/Testing.csv', 'rb') testData = list(csv.reader(marketingTest, delimiter = ',')) trainData = numpy.array(trainData) testData = numpy.array(testData) #One of K encoding of categorical data encoder = preprocessing.LabelEncoder() for j in (1,2,3,4,6,7,8,10,14,15): trainData[:,j] = encoder.fit_transform(trainData[:,j]) testData[:,j] = encoder.fit_transform(testData[:,j]) #Converting numpy strings to floats trainData = trainData.astype(numpy.float) testData = testData.astype(numpy.float) learnDecisionTree() #Good with handling categorical attributes learnRF() #Simple Ensemble classifier, must try learnSVM() #Tested learnLogReg() #Tested
def process(self, elevation=False, plot=False): route_reader = csv.reader(self.route_file) timestation_reader = csv.reader(self.timestation_file) for row in timestation_reader: if timestation_reader.line_num == 1: continue waypoint = GpsWaypoint(lat=float(row[0]), lon=float(row[1]), name=row[2]) timestation = self.TimeStation(waypoint) self.timestations[timestation.number] = timestation timestation_index = 0 for row in route_reader: if route_reader.line_num == 1: continue waypoint = GpsWaypoint(lat=float(row[0]), lon=float(row[1]), name=row[2]) timestation_index = self.add_route_waypoint_to_timestation(waypoint, timestation_index) if elevation: for name, timestation in self.timestations.iteritems(): timestation.download_elevation() if plot: for name, timestation in self.timestations.iteritems(): timestation.plot_grade() else: for name, timestation in self.timestations.iteritems(): timestation.write_csv()
def featureMatching(input_keypoint_list, input_xy_list, input_img, template_img): #inputとtemplateのkeypoint_binary,xy_listを読み込む template_keypoint_list = [] template_keypoint = csv.reader(open('template_keypoint_binary.csv', 'r')) for temp_key in template_keypoint: template_keypoint_list.append(map(int,temp_key)) template_xy_list = [] template_xy = csv.reader(open('template_keypoint_xy.csv', 'r')) for temp_xy in template_xy: template_xy_list.append(map(int,temp_xy)) x_sum_img = len(input_img[0]) + len(template_img[0]) sum_img = np.zeros((len(input_img), x_sum_img), np.uint8) sum_img = np.hstack((input_img, template_img)) template_add_xsize = [] #ハミング距離をとって0になったらマッチングとする for temp_y in xrange(0,len(template_keypoint_list)): for inp_y in xrange(0,len(input_keypoint_list)): calc_list = template_keypoint_list[temp_y] - input_keypoint_list[inp_y] #print 'template_xy' + str(template_xy_list[temp_y]) #print 'input_xy' + str(input_xy_list[inp_y]) if all((x == 0 for x in calc_list)) == True: #マッチングした時のkeypoint_bineryと同じ場所のx,yの値を画像平面上にマッピングし、結果を出力 print 'get match' temp_xy = template_xy_list[temp_y] inp_xy = input_xy_list[inp_y] template_add_xsize = [temp_xy[1] + len(input_img[0]),temp_xy[0]] input_change_xy = [inp_xy[1],inp_xy[0]] add_input = tuple(input_change_xy) add_template = tuple(template_add_xsize) cv2.circle(sum_img,add_input,3,(0,0,0),-1) cv2.circle(sum_img,add_template,3,(0,0,255),-1) #cv2.line(sum_img,add_input,add_template,(255,255,0),1) #print 'add_input = ' +str(add_input) + ' add_template = ' +str(add_template) print 'quit calc' cv2.imwrite('sum_img.tif', sum_img)
def getScore(paramter): extration_file = file('extration.csv') lable_file = file('label.csv') e_reader = csv.reader(extration_file) l_reader = csv.reader(lable_file) scores = [] for w in e_reader: scores.append(calScore(w,paramter)) npscores = np.array(scores) #print type(npscores[0]) argscores = np.argsort(-npscores) buy = [] i = 0 for j in l_reader: # if 1 == int(j[0]): # buy.append(i) buy.append(j) i = i + 1 count = 0 for i in range(50000): #if argscores[i] in buy: # count = count + 1 #print scores[argscores[i]] print buy[argscores[i]][0]+','+buy[argscores[i]][1] i = i+ 1
def load_dataset(category): feature_file = "" score_file = "" if category == "train": feature_file = "../data/features/all_train.txt" score_file = "../data/new_score/new_train.tsv" elif category == "valid": feature_file = "../data/features/all_valid.txt" score_file = "../data/new_score/new_valid.tsv" features = csv.reader(open(feature_file, 'r'), delimiter = '\t') scores = csv.reader(open(score_file, 'r'), delimiter = '\t') feature_names = next(features)[1:] n_samples = get_num_lines(score_file) n_features = len(feature_names) data = np.empty((n_samples, n_features)) score = np.empty((n_samples,)) for i, row in enumerate(features): data[i] = np.asarray(row[1:]) for i, row in enumerate(scores): score[i] = row[2] return Bunch(data = data, score = score, n_samples = n_samples, n_features = n_features, feature_names = feature_names)
def getReader(self, file, dialect=False, delimiter=None, quotechar=None, skiprows=-1): #determine dialect, create csv parser if(dialect is False): lines = [] #get sample for line in file: lines.append(line) if(len(lines) > 500 + skiprows): break if skiprows > 0: lines = lines[skiprows:] #sniff last 20 lines lines = lines[-20:] dialect = csv.Sniffer().sniff("\n".join(lines),delimiters=delimiter) dialect.doublequote=True dialect.skipinitialspace=True if not quotechar is None: if quotechar == '': dialect.quoting = csv.QUOTE_NONE else: dialect.quotechar = quotechar dialect.quoting = csv.QUOTE_MINIMAL file.seek(0) reader = csv.reader(file, dialect) else: reader = csv.reader(file, dialect) return (reader, dialect)
def parse_scores(input, phevor2): """Parses either .simple or .phevor output and returns dictonary\ of sores by gene.""" scores = {} highs = 0 if phevor2 == False: with open(input) as t: for line in csv.reader(t, delimiter="\t"): if line[0] == "RANK": continue sco = -np.log10(float(line[2])) scores[line[1].strip()] = sco if sco > highs: highs = sco if phevor2 == True: with open(input) as t: for line in csv.reader(t, delimiter="\t"): if "#" in line[0]: continue sco = float(line[2]) scores[line[1].strip()] = sco if sco > highs: highs = sco return scores, highs
def read_csv_data(train_name, test_name): Train_X = [] Train_Y = [] with open(train_name, 'r') as csvfile: my_reader = csv.reader(csvfile) for row in my_reader: row = [float(i) for i in row] Train_Y.append(row[0]) Train_X.append(row[1:]) Test_X = [] Test_Y = [] with open(test_name, 'r') as csvfile: my_reader = csv.reader(csvfile) for row in my_reader: row = [float(i) for i in row] Test_Y.append(row[0]) Test_X.append(row[1:]) return Train_X, Train_Y, Test_X, Test_Y
def testAll(IsReferenceBuild = False, UpdateSVN = False): PMapFile = open(getProjectMapPath(), "rb") try: # Validate the input. for I in csv.reader(PMapFile): if (len(I) != 2) : print "Error: Rows in the ProjectMapFile should have 3 entries." raise Exception() if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))): print "Error: Second entry in the ProjectMapFile should be 0" \ " (single file), 1 (project), or 2(single file c++11)." raise Exception() # When we are regenerating the reference results, we might need to # update svn. Remove reference results from SVN. if UpdateSVN == True: assert(IsReferenceBuild == True); updateSVN("delete", PMapFile); # Test the projects. PMapFile.seek(0) for I in csv.reader(PMapFile): testProject(I[0], int(I[1]), IsReferenceBuild) # Add reference results to SVN. if UpdateSVN == True: updateSVN("add", PMapFile); except: print "Error occurred. Premature termination." raise finally: PMapFile.close()
def main(args): x_bar = [] # Compute mean print('\n-----------------------------') print('(360) Tempo di risposta del server.') print('-----------------------------') for current in range(1, BATCH + 1): responseFile = '360response-' + str(current) + '.csv' respReader = csv.reader(open('../data/' + responseFile), delimiter = ',') x_bar_i = 0 n = 0 for row in respReader: if row[0] != 'time': if float(row[0]) > 20.0 and float(row[0]) < 180.0: x_bar_i += float(row[1]) n += 1 x_bar.append(x_bar_i / n) mu = 0 for x_i in x_bar: mu += x_i / BATCH print('Stima puntuale della media: ' + str(mu)) sigma2 = 0 for x_i in x_bar: sigma2 += (x_i - mu)**2 sigma2 /= (BATCH - 1) print('Varianza campionaria: ' + str(sigma2)) a = mu - 1.96 * (sqrt(sigma2) / sqrt(BATCH)) b = mu + 1.96 * (sqrt(sigma2) / sqrt(BATCH)) print('Intervallo di confidenza: (' + str(a) + ', ' + str(b) + ')') # Compute mean print('\n-----------------------------') print('(360) Presence factor.') print('-----------------------------') for current in range(1, BATCH + 1): responseFile = '360presence-' + str(current) + '.csv' respReader = csv.reader(open('../data/' + responseFile), delimiter = ',') x_bar_i = 0 n = 0 for row in respReader: if row[0] != 'time': x_bar_i += float(row[1]) n += 1 x_bar.append(x_bar_i / n) mu = 0 for x_i in x_bar: mu += x_i / BATCH print('Stima puntuale della media: ' + str(mu)) sigma2 = 0 for x_i in x_bar: sigma2 += (x_i - mu)**2 / (BATCH - 1) print('Varianza campionaria: ' + str(sigma2)) a = mu - 1.96 * (sqrt(sigma2) / sqrt(BATCH)) b = mu + 1.96 * (sqrt(sigma2) / sqrt(BATCH)) print('Intervallo di confidenza: (' + str(a) + ', ' + str(b) + ')')
def combineAll(initialFile, timeFile, outputFile): with open(initialFile, "rb") as initials, open(timeFile, "rb") as times, open(outputFile, "wb") as output: initialReader = csv.reader(initials, delimiter=",") timeReader = csv.reader(times, delimiter=",") writer = csv.writer(output, lineterminator="\n") for line1, line2 in izip(initialReader, timeReader): id1 = line1[0] id2 = line2[0] # print id1,id2 while not id1 == id2: if id1 > id2: line2 = timeReader.next() elif id2 > id1: line1 = initialReader.next() id1 = line1[0] id2 = line2[0] # print id1,id2 row = line2 + line1[1 : len(line1)] # print row writer.writerow(row)
def merge_csv(first_csv_path, second_csv_path, result_csv_path, delimiter="|"): """Сливает два csv-файла с данными в один. У всех файлов должен быть один тип представления: obj | param | count """ res = {} first_csv = open(first_csv_path, "r") reader = csv.reader(first_csv, delimiter=delimiter) for row in reader: key = (row[0], row[1]) res.setdefault(key, 0) res[key] += int(row[2]) first_csv.close() second_csv = open(second_csv_path, "r") reader = csv.reader(second_csv, delimiter=delimiter) for row in reader: key = (row[0], row[1]) res.setdefault(key, 0) res[key] += int(row[2]) second_csv.close() result_csv = open(result_csv_path, "w") writer = csv.writer(result_csv, delimiter=delimiter) for key, value in res.items(): writer.writerow([key[0], key[1], value]) result_csv.close()
def main(): """main function. Args: parsers (inst): Parser class intance for terminal input. Returns: None Examples: >>> main() >>> The URL you\'ve submitted is INVALID, enter VALID URL. """ parser = argparse.ArgumentParser() parser.add_argument('-u', '--file', help='Enter URL Link to CSV File') parser.add_argument('-c', '--servers', help='Enter number of Servers') args = parser.parse_args() try: if args.file: grab_file = urllib2.urlopen(args.file) read_file = csv.reader(grab_file) for row in read_file: simulate_one_server(int(row[0]), int(row[2])) elif args.servers: grab_file = urllib2.urlopen('http://s3.amazonaws.com/cuny-is211-spring2015/requests.csv') read_file = csv.reader(grab_file) simulate_many_servers(read_file, args.servers) else: print 'Invalid attempt, not a server, not a url' except urllib2.URLError as url_err: print 'The URL you\'ve submitted is INVALID, enter VALID URL' raise url_err
def main(opt): # Look for more than one value for each attribute keep = [] # this holds column indices that have more than one value with open(opt.inputFile, 'rU') as fin: fin = csv.reader(fin, delimiter='\t') row = fin.next() # the header last = fin.next() # this holds the last value of each attribute #for i, row in enumerate(fin.__iter__()): for i, row in enumerate(fin.__iter__()): for j, val in enumerate(row): if j not in keep and row[j] != last[j]: keep.append(j) print 'Writing', len(keep), 'out of', len(last), 'attributes which had more than one value.' # Write out the columns we want to keep with open(opt.inputFile, 'rU') as fin: fin = csv.reader(fin, delimiter='\t') with open(opt.outputFile, 'w') as fout: fout = csv.writer(fout, delimiter='\t') for i, row in enumerate(fin.__iter__()): orow = [] for j in keep: orow.append(row[j]) fout.writerow(orow) return 0
def __init__(self, fpath, newnames=None, delimiter=None, transpose=False): f = open(fpath, "rb") if delimiter is None: dialect = csv.Sniffer().sniff(f.read(1024)) # dialect = csv.Sniffer().sniff(f.read(1024), ',:|\t') f.seek(0) data_stream = csv.reader(f, dialect) else: data_stream = csv.reader(f, delimiter=delimiter) if transpose: transposed = transpose_table(list(data_stream)) data_stream = iter(transposed) else: transposed = None self.fpath = fpath if newnames is not None: #TODO: move this junk out of the class basename = os.path.splitext(os.path.basename(fpath))[0] for k in newnames: m = self.eval_re.match(newnames[k]) if m: eval_str = m.group(1) newnames[k] = eval_with_template(eval_str, {'basename': basename}) self.newnames = newnames self.transposed = transposed self.f = f self.data_stream = data_stream self._fields = None self._field_names = None self._numlines = None
def main(): # load csv of x1,x2 values with open('traj.dat','rb') as f: reader = csv.reader(f, delimiter = ',') row_count = sum(1 for low in reader) print(row_count) # initialize an array with the number of rows x1all = numpy.zeros((1,row_count)) x2all = numpy.zeros((1,row_count)) with open('traj.dat','rb') as f: reader = csv.reader(f, delimiter = ',') idx = 0; for row in reader: print(row) x1all[0,idx] = float(row[0]) x2all[0,idx] = float(row[1]) #print("x1: " + x1all[0,idx] ) #" x2:" + x2) idx = idx + 1 print(x1all[0,1]) for i in range(len(x1all)): # compute I x1 = x1all[0,i] x2 = x2all[0,i] gainx = numpy.array([[0],[0]]) I = computecurrent(x1,x2,gainx) print(I)
def combineTimes(acceptedTimeFile, earliestTimeFile, outputFile): with open(acceptedTimeFile, "rb") as aF, open(earliestTimeFile, "rb") as eF, open(outputFile, "wb") as output: acceptedReader = csv.reader(aF, delimiter=",") earliestReader = csv.reader(eF, delimiter=",") writer = csv.writer(output, lineterminator="\n") for line1, line2 in izip(acceptedReader, earliestReader): id1 = line1[0] id2 = line2[0] # print id1,id2 while not id1 == id2: if id1 > id2: line2 = earliestReader.next() elif id2 > id1: line1 = acceptedReader.next() id1 = line1[0] id2 = line2[0] # print id1,id2 # print id1 accepted = line1[1] earliest = line2[1] writer.writerow([id1, accepted, earliest])
def updateDelimiter(self): self.dialect = None if not self.delimiter: try: self.dialect = csv.Sniffer().sniff("\n".join(self.data[:10])) except: try: self.dialect = csv.Sniffer().sniff(self.data[0]) except: pass if self.dialect: reader = csv.reader(self.data, self.dialect) else: if not self.delimiter: if "\t" in self.data[0]: self.delimiter = "\t" elif ";" in self.data[0]: self.delimiter = ";" elif "," in self.data[0]: self.delimiter = "," else: self.delimiter = " " reader = csv.reader(self.data, delimiter=self.delimiter) try: self.numFields = len(reader.next()) except: raise ImportFormatError( type="encodingError", info=_("File is not encoded in UTF-8"))
def test_build(self): edgepath = '/Users/erickpeirson/Downloads/Topic-Affinity-Propagation/edge.txt' distpath = '/Users/erickpeirson/Downloads/Topic-Affinity-Propagation/distribution.txt' graph = nx.Graph() # Load edge data into Graph. with open(edgepath, 'r') as f: reader = csv.reader(f, delimiter=' ') for line in reader: try: graph.add_edge(int(line[1]), int(line[2]), weight=float(line[3])) except: pass authors = { n:n for n in graph.nodes() } # Load dist data into atheta. atheta = {} with open(distpath, 'r') as f: reader = csv.reader(f, delimiter=' ') i = 0 for line in reader: data = line[1:-1] if len(data) > 0: if i in graph.nodes(): atheta[i] = np.array([ float(d) for d in data ]) i += 1 # Estimate params. tapmodel = TAPModel(graph, atheta) tapmodel.build()
import csv fields = [] rows = [] with open('dname.csv', 'r') as t1, open('fname.csv', 'r') as t2: fileone = t1.readlines() filetwo = t2.readlines() csvreader = csv.reader(fileone) csvreader2 = csv.reader(filetwo) # fields = next(csvreader) array = [] array2 = [] for row in csvreader: rows.append(row) array.append(int(row[10])) nt = [] with open('done.csv', 'w') as csvfile: # creating a csv writer object csvwriter = csv.writer(csvfile) f = open("demo1.txt", "a") c = 0 for ro in csvreader2: c=c+1 tag = ro[10].replace('="','') tag = tag.replace('"','') tag = int(tag) if tag not in array: print (tag) f.write(str(tag)+"\n") nt.append(tag) print(len(nt))
# -*- coding: utf-8 -*- """ Created on Sat Feb 6 17:51:23 2021 @author: athir """ import csv import random edited_data = [] with open('speech_blocks_objects.txt') as speechblocks: speechblocks_objects = speechblocks.read().splitlines() with open('train100k.txt') as training_data: data_reader = csv.reader(training_data, delimiter='\t') for line in data_reader: if line[0] == 'CapableOf': new_line = [] new_line.append(line[0]) new_line.append(line[1]) max_scene_length = random.randint(0, 5) object_list = [] for i in speechblocks_objects: if i + ' ' in line[2] or line[2].endswith(i): object_list.append(i) while(len(object_list) < max_scene_length): random_number = random.randint(0, 733)
def main(method): with open('pitch-type.csv', 'r') as f: reader = csv.reader(f) for type in reader: implementDP(type=type, method=method)
import cv2 import numpy as np import sklearn from sklearn.model_selection import train_test_split from sklearn.utils import shuffle import matplotlib.pyplot as plt import matplotlib.image as mpimg from keras.models import Sequential from keras.layers import Flatten, Dense, Lambda, Cropping2D from keras.layers import Convolution2D from keras import backend as K # Load the training data taken from the simulator in Training Mode samples = [] with open('./Data/driving_log.csv') as csvfile: reader = csv.reader(csvfile) for line in reader: samples.append(line) # Split the data in training and validation sets train_samples, validation_samples = train_test_split(samples, test_size=0.35) # Define correction factor correction = 0.1 # Define generator with an extra argument to distinguish between training and prediction mode def generator(samples, batch_size=32, mode="prediction"): # In case of training feed center, left and right camera images to the model if mode == "training": img_per_row = 3
def train_model(test_on, n_epochs, loss_train, patience, select_arti, corpus_to_train_on, batch_norma, filter_type, to_plot, lr, delta_test, config, speakers_to_train_on = "", speakers_to_valid_on = "", relearn = False): """ :param test_on: (str) one speaker's name we want to test on, the speakers and the corpus the come frome can be seen in "fonction_utiles.py", in the function "get_speakers_per_corpus'. :param n_epochs: (int) max number of epochs for the training. We use an early stopping criterion to stop the training, so usually we dont go through the n_epochs and the early stopping happends before the 30th epoch (1 epoch is when have trained over ALL the data in the training set) :param loss_train: (int) alpha in the combined loss . can be anything between 0 and 100. the loss is the combinated loss alpha*rmse/1000+(1-alpha)*pearson. :param patience: (int) the number successive epochs with a validation loss increasing before stopping the training. We usually set it to 5. The more data we have, the smaller it can be (i think) :param select_arti: (bool) always true, either to use the trick to only train on available articulatory trajectories, fixing the predicted trajectory (to zero) and then the gradient will be 0. :param corpus_to_train_on: (list) list of the corpuses to train on. Usually at least the corpus the testspeaker comes from. (the testspeaker will be by default removed from the training speakers). :param batch_norma: (bool) whether or not add batch norm layer after the lstm layers (maybe better to add them after the feedforward layers? ) :param filter_type: (int) either 0 1 or 2. 0 the filter is outside of the network, 1 it is inside and the weight are fixed during the training, 2 the weights get adjusted during the training :param to_plot: (bool) if true the trajectories of one random test sentence are saved in "images_predictions" :param lr: initial learning rate, usually 0.001 :param delta_test: frequency of validation evaluation, 1 seems good :param config : either "spe" "dep", or "indep", for specific (train only on test sp), dependant (train on test sp and others), or independant, train only on other speakers :return: [rmse, pearson] . rmse the is the list of the 18 rmse (1 per articulator), same for pearson. """ f_loss_train = open('training_loss.csv', 'w') f_loss_valid = open('valid_loss.csv', 'w') corpus_to_train_on = corpus_to_train_on[1:-1].split(",") speakers_to_train_on = speakers_to_train_on[1:-1].replace("'", "").replace('"', '').replace(' ', '').split(",") if speakers_to_train_on == [""] or speakers_to_train_on == []: train_on = which_speakers_to_train_on(corpus_to_train_on, test_on, config) else: train_on = speakers_to_train_on speakers_to_valid_on = speakers_to_valid_on[1:-1].replace("'", "").replace('"', '').replace(' ', '').split(",") if speakers_to_valid_on == [""] or speakers_to_valid_on == []: valid_on = [] else: valid_on = speakers_to_valid_on print('train', train_on) print('valid', valid_on) print('test', test_on) name_corpus_concat = "" if config != "spec" : # if spec DOESNT train on other speakers for corpus in corpus_to_train_on: name_corpus_concat = name_corpus_concat + corpus + "_" name_file = test_on+"_"+config+"_"+name_corpus_concat+"loss_"+str(loss_train)+"_filter_"+\ str(filter_type)+"_bn_"+str(batch_norma) if not os.path.exists("saved_models"): os.mkdir("saved_models") previous_models = os.listdir("saved_models") previous_models_2 = [x[:len(name_file)] for x in previous_models if x.endswith(".txt")] n_previous_same = previous_models_2.count(name_file) # how many times our model was trained if n_previous_same > 0: print("this models has alread be trained {} times".format(n_previous_same)) else : print("first time for this model") name_file = name_file + "_" + str(n_previous_same) # each model trained only once , # this script doesnt continue a previous training if it was ended ie if there is a .txt print("going to train the model with name",name_file) cuda_avail = torch.cuda.is_available() print(" cuda ?", cuda_avail) if cuda_avail: device = torch.device("cuda") else: device = torch.device("cpu") hidden_dim = 300 input_dim = 429 batch_size = 10 output_dim = 18 early_stopping = EarlyStopping(name_file, patience=patience, verbose=True) model = my_ac2art_model(hidden_dim=hidden_dim, input_dim=input_dim, name_file=name_file, output_dim=output_dim, batch_size=batch_size, cuda_avail=cuda_avail, filter_type=filter_type, batch_norma=batch_norma) model = model.double() file_weights = os.path.join("saved_models", name_file +".pt") if cuda_avail: model = model.to(device=device) if relearn: load_old_model = True if load_old_model: if os.path.exists(file_weights): print("previous model did not finish learning") loaded_state = torch.load(file_weights,map_location=device) model.load_state_dict(loaded_state) model_dict = model.state_dict() loaded_state = {k: v for k, v in loaded_state.items() if k in model_dict} # only layers param that are in our current model loaded_state = {k: v for k, v in loaded_state.items() if loaded_state[k].shape == model_dict[k].shape} # only if layers have correct shapes model_dict.update(loaded_state) model.load_state_dict(model_dict) files_per_categ, files_for_test = give_me_train_valid_test_filenames(train_on=train_on,test_on=test_on,config=config,batch_size= batch_size, valid_on=valid_on) optimizer = torch.optim.Adam(model.parameters(), lr=lr) categs_to_consider = files_per_categ.keys() with open('categ_of_speakers.json', 'r') as fp: categ_of_speakers = json.load(fp) # dict that gives for each category the speakers in it and the available arti plot_filtre_chaque_epochs = False for epoch in range(n_epochs): weights = model.lowpass.weight.data[0, 0, :].cpu() if plot_filtre_chaque_epochs : plot_filtre(weights) n_this_epoch = 0 random.shuffle(list(categs_to_consider)) loss_train_this_epoch = 0 loss_pearson = 0 loss_rmse = 0 for categ in categs_to_consider: files_this_categ_courant = files_per_categ[categ]["train"] random.shuffle(files_this_categ_courant) while len(files_this_categ_courant) > 0: # go through all the files batch by batch n_this_epoch+=1 x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size]) files_this_categ_courant = files_this_categ_courant[batch_size:] #we a re going to train on this 10 files x, y = model.prepare_batch(x, y) if cuda_avail: x, y = x.to(device=model.device), y.to( device=model.device) y_pred = model(x).double() if cuda_avail: y_pred = y_pred.to(device=device) y = y.double () optimizer.zero_grad() if select_arti: arti_to_consider = categ_of_speakers[categ]["arti"] # liste de 18 0/1 qui indique les arti à considérer idx_to_ignore = [i for i, n in enumerate(arti_to_consider) if n == "0"] y_pred[:, :, idx_to_ignore] = 0 #the grad associated to this value will be zero : CHECK THAT # y_pred[:,:,idx_to_ignore].detach() #y[:,:,idx_to_ignore].requires_grad = False loss = criterion_both(y, y_pred,alpha=loss_train, cuda_avail = cuda_avail, device=device) loss.backward() optimizer.step() # computation to have evolution of the losses loss_2 = criterion_pearson(y, y_pred, cuda_avail = cuda_avail, device=device) loss_pearson += loss_2.item() loss_3 = torch.nn.MSELoss(reduction='sum')(y, y_pred) loss_rmse += loss_3.item() torch.cuda.empty_cache() loss_train_this_epoch += loss.item() torch.cuda.empty_cache() loss_train_this_epoch = loss_train_this_epoch/n_this_epoch print("Training loss for epoch", epoch, ': ', loss_train_this_epoch) f_loss_train.write(str(epoch) + ',' + str(loss_train_this_epoch) + ',' + str(loss_pearson/n_this_epoch/batch_size/18.*(-1.)) + ',' + str(loss_rmse/n_this_epoch/batch_size) + '\n') if epoch%delta_test == 0: #toutes les delta_test epochs on évalue le modèle sur validation et on sauvegarde le modele si le score est meilleur loss_vali = 0 n_valid = 0 loss_pearson = 0 loss_rmse = 0 for categ in categs_to_consider: # de A à F pour le moment files_this_categ_courant = files_per_categ[categ]["valid"] # on na pas encore apprit dessus au cours de cette epoch while len(files_this_categ_courant) >0 : n_valid +=1 x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size]) files_this_categ_courant = files_this_categ_courant[batch_size:] # on a appris sur ces 10 phrases x, y = model.prepare_batch(x, y) if cuda_avail: x, y = x.to(device=model.device), y.to(device=model.device) y_pred = model(x).double() torch.cuda.empty_cache() if cuda_avail: y_pred = y_pred.to(device=device) y = y.double() # (Batchsize, maxL, 18) if select_arti: arti_to_consider = categ_of_speakers[categ]["arti"] # liste de 18 0/1 qui indique les arti à considérer idx_to_ignore = [i for i, n in enumerate(arti_to_consider) if n == "0"] y_pred[:, :, idx_to_ignore] = 0 # y_pred[:, :, idx_to_ignore].detach() # y[:, :, idx_to_ignore].requires_grad = False loss_courant = criterion_both(y, y_pred, loss_train, cuda_avail = cuda_avail, device=device) loss_vali += loss_courant.item() # to follow both losses loss_2 = criterion_pearson(y, y_pred, cuda_avail = cuda_avail, device=device) loss_pearson += loss_2.item() loss_3 = torch.nn.MSELoss(reduction='sum')(y, y_pred) loss_rmse += loss_3.item() loss_vali = loss_vali/n_valid f_loss_valid.write(str(epoch) + ',' + str(loss_vali) + ',' + str(loss_pearson/n_valid/batch_size/18.*(-1.)) + ',' + str(loss_rmse/n_this_epoch/batch_size) + '\n') torch.cuda.empty_cache() model.all_validation_loss.append(loss_vali) model.all_training_loss.append(loss_train_this_epoch) early_stopping(loss_vali, model) if early_stopping.early_stop: print("Early stopping, n epochs : ", model.epoch_ref + epoch) break if epoch > 0: # on divise le learning rate par deux dès qu'on surapprend un peu par rapport au validation set if loss_vali > model.all_validation_loss[-1]: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] / 2 (param_group["lr"]) if n_epochs > 0: model.epoch_ref = model.epoch_ref + epoch # voir si ca marche vrmt pour les rares cas ou on continue un training model.load_state_dict(torch.load(os.path.join("saved_models",name_file+'.pt'))) torch.save(model.state_dict(), os.path.join( "saved_models",name_file+".txt")) #lorsque .txt ==> training terminé ! random.shuffle(files_for_test) x, y = load_np_ema_and_mfcc(files_for_test) print("evaluation on speaker {}".format(test_on)) std_speaker = np.load(os.path.join(root_folder,"Preprocessing","norm_values","std_ema_"+test_on+".npy")) arti_per_speaker = os.path.join(root_folder, "Preprocessing", "articulators_per_speaker.csv") csv.register_dialect('myDialect', delimiter=';') with open(arti_per_speaker, 'r') as csvFile: reader = csv.reader(csvFile, dialect="myDialect") next(reader) for row in reader: if row[0] == test_on: arti_to_consider = row[1:19] arti_to_consider = [int(x) for x in arti_to_consider] rmse_per_arti_mean, pearson_per_arti_mean = model.evaluate_on_test(x, y, std_speaker = std_speaker, to_plot=to_plot , to_consider = arti_to_consider) """ RESULTS ON VALIDATION SET """ pearson_valid = np.zeros((1,output_dim)) for categ in categs_to_consider: # de A à F pour le moment files_this_categ_courant = files_per_categ[categ]["valid"] # on na pas encore apprit dessus au cours de cette epoch while len(files_this_categ_courant) > 0: x, y = load_np_ema_and_mfcc(files_this_categ_courant[:batch_size]) files_this_categ_courant = files_this_categ_courant[batch_size:] # on a appris sur ces 10 phrases arti_to_consider = categ_of_speakers[categ]["arti"] # liste de 18 0/1 qui indique les arti à considérer rien, pearson_valid_temp = model.evaluate_on_test(x,y,std_speaker=1, to_plot=to_plot, to_consider=arti_to_consider,verbose=False) pearson_valid_temp = np.reshape(np.array(pearson_valid_temp),(1,output_dim)) pearson_valid = np.concatenate((pearson_valid,pearson_valid_temp),axis=0) pearson_valid = pearson_valid[1:,:] pearson_valid[np.isnan(pearson_valid)] = 0 pearson_valid = np.mean(pearson_valid,axis=0) print("on validation set :mean :\n",pearson_valid) print("training done for : ",name_file) articulators = ['tt_x', 'tt_y', 'td_x', 'td_y', 'tb_x', 'tb_y', 'li_x', 'li_y', 'ul_x', 'ul_y', 'll_x', 'll_y', 'la', 'lp', 'ttcl', 'tbcl', 'v_x', 'v_y'] if not os.path.exists('model_results.csv'): with open('model_results.csv', 'a',newline = "") as f: writer = csv.writer(f) header = ["name file", "test on", "configuration", "train on (if not spec)", "loss", "n_epochs", "evaluation with...", "average"] + articulators writer.writerow(header) # write result in csv with open('model_results.csv', 'a',newline = "") as f: writer = csv.writer(f) row_details = [name_file,test_on,config,name_corpus_concat,loss_train,model.epoch_ref] row_rmse = row_details + ["rmse_on_test", np.mean(rmse_per_arti_mean[rmse_per_arti_mean!=0])] +\ rmse_per_arti_mean.tolist() row_pearson = row_details + ["pearson_on_test", np.mean(pearson_per_arti_mean[pearson_per_arti_mean!=0])]+\ pearson_per_arti_mean.tolist() row_pearson_val = row_details + ["pearson_on_valid", np.mean(pearson_valid[pearson_valid !=0])] + \ pearson_valid.tolist() writer.writerow(row_rmse) writer.writerow(row_pearson) writer.writerow(row_pearson_val) weight_apres = model.lowpass.weight.data[0, 0, :].cpu() plot_allure_filtre = True if plot_allure_filtre: plot_filtre(weight_apres) return rmse_per_arti_mean, pearson_per_arti_mean
def format_time(start, end): hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) execut_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds) return execut_time if __name__ == '__main__': with open(INPUT_FILE, encoding="utf8") as input, open(OUTPUT_FILE, 'w', newline='') as output: writer = csv.writer(output) writer.writerow(['videoId', 'text_density']) start = time.time() print(f'Start at {time.strftime("%H:%M")}') count = 0 for row in csv.reader(input): if row[0] != 'videoId': videoId = row[0].replace('/watch?v=', '') t = get_text_density(videoId) print(f'[{videoId}] Text density: ', t) writer.writerow([videoId, t]) output.flush() count += 1 end = time.time() print(f'End at {time.strftime("%H:%M")}') print('Done in', format_time(start, end))
def handle(self, *args, **options): print('========== Заполнение базы данных ==========') print('====== Заполнение таблицы Section ==========') with open('content/1_Section.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Section.objects.create(name=row[0]) print(row[0]) print('=============================================') print() print('====== Заполнение таблицы Division ==========') with open('content/2_Division.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Division.objects.create(name=row[0], number=int(row[1])) print(row[0], row[1]) print('=============================================') print() print('====== Заполнение таблицы Party =============') with open('content/3_Party.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Party.objects.create(name=row[0], propose_name=row[1], member_name=row[2]) print(row[0]) print('=============================================') print() print('====== Заполнение таблицы Roles =============') with open('content/4_Roles.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Roles.objects.create(name=row[0]) print(row[0]) print('=============================================') print() print('====== Заполнение таблицы Commissions =============') with open('content/5_Commissions.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Commissions.objects.create(name=row[0], description=row[1]) print(row[0]) print('=============================================') print() print('====== Заполнение таблицы Address =============') with open('content/6_Address.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: Address.objects.create(name=row[0], full_address=row[1], address=row[2]) print(row[0]) print('=============================================') print() print('====== Заполнение таблицы Deputy =============') with open('content/7_Deputy.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for i, row in enumerate(reader): if i > 0: # В первой строке заколовки колонок - пропускаем party = Party.objects.get(name=row[13]) division = Division.objects.get(number=row[14]) if len(row[3]) > 0: Deputy.objects.create(surname=row[0], name=row[1], second_name=row[2], date_of_birth=datetime.strptime( row[3], "%Y-%m-%d").date(), address=row[4], site=row[5], telephone=row[6], email=row[7], skype=row[8], telegram=row[9], is_head=row[10], is_party=row[11], is_man=row[12], party_propose=party, division=division) else: Deputy.objects.create(surname=row[0], name=row[1], second_name=row[2], address=row[4], site=row[5], telephone=row[6], email=row[7], skype=row[8], telegram=row[9], is_head=row[10], is_party=row[11], is_man=row[12], party_propose=party, division=division) print(row[0], row[1], row[2], row[3]) # surname | name | second_name | date_of_birth | address | site | telephone | email | skype | telegram | # is_head | is_party | is_man | party_propose | division print('=============================================') print() print('=== Заполнение таблицы Members ====') with open('content/8_Members.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: deputy = Deputy.objects.get(surname=row[0]) commissions = Commissions.objects.get(name=row[1]) role = Roles.objects.get(name=row[2]) Members.objects.create(deputy=deputy, role=role, commissions=commissions) print(row[0], row[1], row[2]) print('=============================================') print() print('=== Заполнение таблицы ReceptionSchedule ====') with open('content/9_Schedule.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: deputy = Deputy.objects.get(surname=row[0]) address = Address.objects.get(name=row[4]) start = datetime.strptime(row[2], "%H:%M").time() end = datetime.strptime(row[3], "%H:%M").time() ReceptionSchedule.objects.create(date=datetime.strptime( row[1], "%Y-%m-%d").date(), start_time=start, end_time=end, deputy=deputy, address=address) print(row[0], row[1], row[2], row[3], row[4]) print('=============================================') print() print('=== Заполнение таблицы Assistants ====') with open('content/10_Assistants.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f, delimiter='|') for row in reader: deputy = Deputy.objects.get(surname=row[0]) Assistants.objects.create(surname=row[1], name=row[2], second_name=row[3], deputy=deputy) print(row[0], row[1], row[2], row[3]) print('=============================================')
def textual_position(path): data = [] reader = csv.reader(open(path)) for row in reader: data.append(row) geo_features_list = [] for i in range(len(data)): for j in range(len(data[i])): vec = re.split('\,|\(|\)', data[i][j]) tuple_new = (int(vec[1]), int(vec[2])) data[i][j] = tuple_new textual_position_list = [] for d in data: textual_position = [] for i in range(0, 17): m1 = d[i] m2 = (d[i][0] - 2, d[i][1] + 2) m3 = (d[i][0] - 2, d[i][1] + 1) m4 = (d[i][0] - 2, d[i][1]) m5 = (d[i][0] - 1, d[i][1] - 2) m6 = (d[i][0], d[i][1] - 2) m7 = (d[i][0] + 1, d[i][1] - 2) m8 = (d[i][0] + 2, d[i][1]) m9 = (d[i][0] + 2, d[i][1] + 1) m10 = (d[i][0] + 2, d[i][1] + 1) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) textual_position.append(m5) textual_position.append(m6) textual_position.append(m7) textual_position.append(m8) textual_position.append(m9) textual_position.append(m10) for i in range(17, 22): m1 = d[i] m2 = (d[i][0] - 2, d[i][1] - 1) m3 = (d[i][0] + 1, d[i][1]) m4 = (d[i][0] + 2, d[i][1] - 1) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) for i in range(22, 27): m1 = d[i] m2 = (d[i][0] - 2, d[i][1] - 1) m3 = (d[i][0] - 1, d[i][1]) m4 = (d[i][0] + 2, d[i][1] - 1) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) for i in range(27, 36): m1 = d[i] m2 = (d[i][0], d[i][1] + 1) m3 = (d[i][0], d[i][1] + 2) m4 = (d[i][0] - 2, d[i][1]) m5 = (d[i][0] - 1, d[i][1] - 1) m6 = (d[i][0], d[i][1] - 1) m7 = (d[i][0] + 1, d[i][1] - 1) m8 = (d[i][0] + 2, d[i][1]) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) textual_position.append(m5) textual_position.append(m6) textual_position.append(m7) textual_position.append(m8) for i in range(36, 48): m1 = d[i] m2 = (d[i][0] - 2, d[i][1]) m3 = (d[i][0] - 2, d[i][1] + 1) m4 = (d[i][0], d[i][1] + 1) m5 = (d[i][0] + 1, d[i][1] + 1) m6 = (d[i][0] + 2, d[i][1]) m7 = (d[i][0] + 1, d[i][1] - 1) m8 = (d[i][0], d[i][1] - 1) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) textual_position.append(m5) textual_position.append(m6) textual_position.append(m7) textual_position.append(m8) for i in range(48, 68): m1 = d[i] m2 = (d[i][0] - 2, d[i][1]) m3 = (d[i][0] - 1, d[i][1] + 1) m4 = (d[i][0], d[i][1] + 1) m5 = (d[i][0] + 1, d[i][1] + 1) m6 = (d[i][0] + 2, d[i][1]) m7 = (d[i][0] + 1, d[i][1] - 1) m8 = (d[i][0], d[i][1] - 1) m9 = (d[i][0] - 1, d[i][1] - 1) m10 = (d[i][0] - 1, d[i][1] + 2) m11 = (d[i][0], d[i][1] + 2) m12 = (d[i][0] + 1, d[i][1] + 2) m13 = (d[i][0] + 1, d[i][1] - 2) m14 = (d[i][0], d[i][1] - 2) m15 = (d[i][0] - 1, d[i][1] - 2) textual_position.append(m1) textual_position.append(m2) textual_position.append(m3) textual_position.append(m4) textual_position.append(m5) textual_position.append(m6) textual_position.append(m7) textual_position.append(m8) textual_position.append(m9) textual_position.append(m10) textual_position.append(m11) textual_position.append(m12) textual_position.append(m13) textual_position.append(m14) textual_position.append(m15) textual_position_list.append(textual_position) for i in range(len(textual_position_list)): for j in range(len(textual_position_list[i])): if textual_position_list[i][j][0] < 0: textual_position_list[i][j] = (0, textual_position_list[i][j][1]) if textual_position_list[i][j][0] >= 480: textual_position_list[i][j] = (479, textual_position_list[i][j][1]) if textual_position_list[i][j][1] < 0: textual_position_list[i][j] = (textual_position_list[i][j][0], 0) if textual_position_list[i][j][1] >= 680: textual_position_list[i][j] = (textual_position_list[i][j][0], 479) return textual_position_list
def convert_csv_import(cr, module, fname, csvcontent, idref=None, mode='init', noupdate=False): '''Import csv file : quote: " delimiter: , encoding: utf-8''' if not idref: idref = {} model = ('.'.join(fname.split('.')[:-1]).split('-'))[0] #remove folder path from model head, model = os.path.split(model) pool = pooler.get_pool(cr.dbname) input = cStringIO.StringIO(csvcontent) #FIXME reader = csv.reader(input, quotechar='"', delimiter=',') fields = reader.next() fname_partial = "" if config.get('import_partial'): fname_partial = module + '/' + fname if not os.path.isfile(config.get('import_partial')): pickle.dump({}, file(config.get('import_partial'), 'w+')) else: data = pickle.load(file(config.get('import_partial'))) if fname_partial in data: if not data[fname_partial]: return else: for i in range(data[fname_partial]): reader.next() if not (mode == 'init' or 'id' in fields): _logger.error( "Import specification does not contain 'id' and we are in init mode, Cannot continue." ) return uid = 1 datas = [] for line in reader: if (not line) or not reduce(lambda x, y: x or y, line): continue try: datas.append(map(lambda x: misc.ustr(x), line)) except: _logger.error("Cannot import the line: %s", line) result, rows, warning_msg, dummy = pool.get(model).import_data( cr, uid, fields, datas, mode, module, noupdate, filename=fname_partial) if result < 0: # Report failed import and abort module install raise Exception( _('Module loading %s failed: file %s could not be processed:\n %s') % (module, fname, warning_msg)) if config.get('import_partial'): data = pickle.load(file(config.get('import_partial'))) data[fname_partial] = 0 pickle.dump(data, file(config.get('import_partial'), 'wb')) cr.commit()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', type=int, action='store', dest='data_num', help='choose which data set to use') parser.add_argument('-m', type=int, action='store', dest='filter_method', help='choose which method to filter data') if len(sys.argv) != 5: print 'Command e.g.: python filterUserAndLocationByFreq.py -d 0(1,2) -m 0(1)' sys.exit(1) para = parser.parse_args() if para.data_num == 0: checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE1"] poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE1_1"] checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PATR_FILE1"] loc_latlng = loadPoiInfo(poi_infile, para.data_num) elif para.data_num == 1: checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE2"] poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE2_1"] checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PATR_FILE2"] loc_latlng = loadPoiInfo(poi_infile, para.data_num) elif para.data_num == 2: checkin_infile = settings["ROOT_PATH"] + settings["CHECKIN_PAIR_FILE3"] poi_infile = settings["ROOT_PATH"] + settings["SRC_DATA_FILE3_3"] checkin_outfile = settings["ROOT_PATH"] + settings["FILTER_CHECKIN_PAIR_FILE3"] loc_latlng = loadPoiInfo(poi_infile, para.data_num) else: print 'Invalid choice of data set' sys.exit(1) # Filtering uid_set = set([]) pid_set = set([]) if para.filter_method == 0: pid_uid = defaultdict(set) uid_pid = defaultdict(set) tag = False for line in csv.reader(open(checkin_infile)): if not tag: tag = True continue entry = map(int, line[:-1]) uid, pid1, pid2 = entry[0], entry[1], entry[4] if pid1 in loc_latlng: pid_uid[pid1].add(uid) uid_pid[uid].add(pid1) if pid2 in loc_latlng: pid_uid[pid2].add(uid) uid_pid[uid].add(pid2) removed_pid = set([]) removed_uid = set([]) while True: removed_pid.clear() for pid in pid_uid: pid_uid[pid] = pid_uid[pid] - removed_uid if len(pid_uid[pid]) < settings["FILTER_LOCATION_VISIT_NUM"]: removed_pid.add(pid) for pid in removed_pid: pid_uid.pop(pid) removed_uid.clear() for uid in uid_pid: uid_pid[uid] = uid_pid[uid]-removed_pid if len(uid_pid[uid]) < settings["FILTER_USER_VISIT_NUM"]: removed_uid.add(uid) for uid in removed_uid: uid_pid.pop(uid) if len(removed_uid) == 0: uid_set = set(uid_pid.keys()) pid_set = set(pid_uid.keys()) uid_pid = None pid_uid = None break elif para.filter_method == 1: data = [entry for entry in csv.reader(open(checkin_infile))] data = [map(int, entry[:-1]) for entry in data[1:]] pid_record = defaultdict(set) uid_record = defaultdict(set) for i, entry in enumerate(data): uid, pid1, pid2 = entry[0], entry[1], entry[4] if pid1 in loc_latlng and pid2 in loc_latlng: uid_record[uid].add(i) pid_record[pid1].add(i) pid_record[pid2].add(i) removed_record = set([]) removed_pid = set([]) removed_uid = set([]) while True: for idx in removed_record: uid, pid1, pid2 = data[idx][0], data[idx][1], data[idx][4] pid_record[pid1] = pid_record[pid1] - set([idx]) pid_record[pid2] = pid_record[pid2] - set([idx]) removed_record.clear() for pid in pid_record: if len(pid_record[pid]) < settings["FILTER_LOCATION_RECORD_NUM"]: removed_pid.add(pid) for idx in pid_record[pid]: removed_record.add(idx) for pid in removed_pid: pid_record.pop(pid) removed_pid.clear() for uid in uid_record: if len(uid_record[uid]) < settings["FILTER_USER_RECORD_NUM"]: removed_uid.add(uid) for idx in uid_record[uid]: removed_record.add(idx) for uid in removed_uid: uid_record.pop(uid) removed_uid.clear() print "Removed Record Number: %d" % len(removed_record) if len(removed_record) == 0: uid_set = set(uid_record.keys()) pid_set = set(pid_record.keys()) data = None uid_record = None pid_record = None break tag = False with open(checkin_outfile, "w") as wfp: writer = csv.writer(wfp, lineterminator="\n") for entry in csv.reader(open(checkin_infile)): if not tag: tag = True writer.writerow(entry) else: uid, pid1, pid2 = map(int, [entry[0], entry[1], entry[4]]) if uid in uid_set and pid1 in pid_set and pid2 in pid_set: writer.writerow(entry)
import csv csvpath = os.path.join("Resources", "budget_data.csv") # parameters total_months = 0 months_changes = [] changes = [] greatest_increase = ["", 0] # date & number greatest_decrease = [ "", 99999999 ] # date & number 9999999 is represents the limit of decrease total = 0 with open(csvpath, newline='') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') header = next(csvreader) first_row = next(csvreader) # ---------------------------------------------------------------- total_months = total_months + 1 total = total + int(first_row[1]) previous_month = int(first_row[1]) for row in csvreader: total_months = total_months + 1 total = total + int( row[1]) # not called first_row becasue it's already "used" change = int(row[1]) - previous_month # current_month - previous_month previous_month = int(
# -------------------------------------------------- if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input", "-i", type=str, default="colors.csv", help="Input file (CSV)") parser.add_argument("--output", "-o", type=str, default="colors.py", help="Output file (Python)") parser.add_argument("--download", "-d", action="store_true", help="Download from Wikipedia") args = parser.parse_args() if args.download: download_colors(args.input) with open(args.input, "r") as input_file: with open(args.output, "w") as output_file: for row in csv.reader(input_file): name = row[0].upper() rgb = (int(row[-3]), int(row[-2]), int(row[-1])) output_file.write("{0} = {1}\n".format(name, rgb))
import numpy as np import matplotlib.pyplot as plt import csv from sklearn import model_selection from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import classification_report # In[2]: X = [] # an element of X is represented as (filename,text) Y = [ ] # an element of Y represents the newsgroup category of the corresponding X element with open('Dataset_preprocessed.csv', "r", encoding="utf8") as f: reader = csv.reader(f) included_cols = [5, 13] # included_cols1 = [5] for row in reader: content = list(row[i] for i in included_cols) # content1 = list(row[i] for i in included_cols1) # print(content[0]) # print(content1) X.append(content[1]) Y.append(content[0]) # print(X) # print(Y) # In[3]:
# -*- coding: utf-8 -*- import csv import json f = open("data/CUIs.csv", 'r',encoding="utf-8" ) reader = csv.reader(f,delimiter=',') cuis_lang=list(reader) f.close() rows=[] for row in cuis_lang: index=dict() index["_source"]={'cui': row[0] , 'label': row[1] , 'lang': row[2]} rows.append(index) with open("data/umlsDump.json", "w", encoding="utf-8") as output: for row in rows: output.write(json.dumps(row)) output.write('\n')
try: page = urllib.request.urlopen(webRequest) content = page.read() output = open(localFilePath, "wb") output.write(content) output.close() except urllib.request.HTTPError as err: print(err.fp.read()) if os.path.exists(localFilePath): print("file exists") lineNum = 0 listOfLists = [] with open(localFilePath, "r") as csvFile: lineReader = csv.reader(csvFile) for row in lineReader: price = int(row[1]) date = row[2] postcode = row[3] oneResultRow = [price, date, postcode] listOfLists.append(oneResultRow) print("done with file") listOfListsSorted = sorted(listOfLists, key=lambda x:x[0], reverse=True) print(listOfListsSorted) else: print("no file")
def _extend(filename, n, keys=()): """ For internal use only. Extend a file. :param file: str :param n: int :param keys: tuple :return: str, set """ with open(filename, 'r') as file: header = file.readline() reader = csv.reader(file) lines = [_ for _ in reader] fname = f"{filename}_{n}.csv" with open(fname, 'w') as file: file.write(header) for line in lines: file.write(','.join(line) + '\n') # file.writelines([','.join(x) for x in lines]) # file.write('\n') if not keys: these_keys = set([line[0].strip() for line in lines]) else: these_keys = set() n = n // 5 for i in range(n): for line in lines: mod_words = line[:] if keys: # Use provided users and products uid = random.choice(keys[0]) pid = random.choice(keys[1]) counter = 0 while (uid, pid) in these_keys: uid = random.choice(keys[0]) pid = random.choice(keys[1]) if counter > 100: break if (uid, pid) in these_keys: continue file.write(f"{uid}, {pid}, {random.randint(1, int(mod_words[-1].strip()) * 2)}\n") else: mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))]) while mod_key.strip() in these_keys: mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))]) these_keys.add(mod_key) mod_words[0] = mod_key for j, word in enumerate(line[1:], 1): # If a phone number, randomize digits if re.match(r"\d{3}-\d{3}-\d{4}", word.strip()): num = f"{random.randint(0, 9999999999):09d}" mod_words[j] = num[:3] + '-' + num[3:6] + '-' + num[-4:] # If a number, randomize elif re.fullmatch(r"\d*", word.strip()): num = random.randint(1, int(word.strip()) * 2) mod_words[j] = str(num) else: # Replace 1/2 of characters with random digits mod_locs = [random.randint(0, len(word) - 1) for _ in range(len(word) // 2)] lst = list(word) for loc in mod_locs: lst[loc] = random.choice(string.ascii_letters) mod_words[j] = ''.join(lst) file.write(','.join(mod_words) + '\n') # file.writelines([]) for line in lines]) return fname, these_keys
def clock_in(): """ Student selects a subject to give attendance for, then using cascade classifier and trainer.yml, the student is verified """ notifier.configure( text='CONSOLE: Analysing and Rendering Facial Features..') start = time.time() period = 10 face_cascade = cv2.CascadeClassifier( 'assets/haarcascade_frontalface_default.xml') """ Taking input from drop down menu """ subjectchoice = (clicked.get()) cap = cv2.VideoCapture(0) recognizer = cv2.face.LBPHFaceRecognizer_create() recognizer.read('trainer/trainer.yml') i = 0 j = 0 font = cv2.FONT_HERSHEY_SIMPLEX col_names = ['Id', 'Name', 'Gender', 'Age', 'Phone', 'Address', 'Subject', 'Date', 'Time'] df = pd.read_csv("student_details/student_details.csv") while True: ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 7) for (x, y, w, h) in faces: # roi is the region of interest, it slices the gray array. It selected row starting with y till y+h and column starting with x till x+w roi_gray = gray[y:y + h, x:x + w] cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) serial, confidence = recognizer.predict(roi_gray) if (confidence < 40): ts = time.time() date = datetime.datetime.fromtimestamp(ts).strftime('%d-%m-%Y') timeStamp = datetime.datetime.fromtimestamp( ts).strftime('%H:%M:%S') fetchedname = df.loc[df['SERIAL NO.'] == serial]['NAME'].values fetchedgender = df.loc[df['SERIAL NO.'] == serial]['GENDER'].values fetchedage = df.loc[df['SERIAL NO.'] == serial]['AGE'].values fetchedphonenumber = df.loc[df['SERIAL NO.'] == serial]['PHONE NUMBER'].values fetchedaddress = df.loc[df['SERIAL NO.'] == serial]['ADDRESS'].values ID = df.loc[df['SERIAL NO.'] == serial]['ID'].values ID = str(ID) ID = ID[1:-1] name2 = str(fetchedname) name2 = name2[2:-2] gender2 = str(fetchedgender) gender2 = gender2[2:-2] age2 = str(fetchedage) age2 = age2[1:-1] phonenumber2 = str(fetchedphonenumber) phonenumber2 = phonenumber2[1:-1] address2 = str(fetchedaddress) address2 = address2[2:-2] attendance = [str(ID), name2, gender2, age2, phonenumber2, address2, subjectchoice, str(date), str(timeStamp)] else: Id = '\n Unable To Recognize This Entity! \n' name2 = str(Id) cv2.putText(frame, "Name : " + str(name2) + " Confidence (Lower the Better): " + str(int(confidence)), (x, y - 10), font, 1, (120, 255, 120), 4) cv2.imshow('frame', frame) if time.time() > start + period: break if cv2.waitKey(100) & 0xFF == ord('q'): break ts = time.time() date = datetime.datetime.fromtimestamp(ts).strftime('%d-%m-%Y') """ Writing to daily_generated_attendance_csv + date + .csv to display in GUI and also as a CSV """ exists = os.path.isfile( "daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv") if exists: with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'a+') as csvFile1: writer = csv.writer(csvFile1) writer.writerow(attendance) csvFile1.close() else: with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'a+') as csvFile1: writer = csv.writer(csvFile1) writer.writerow(col_names) writer.writerow(attendance) csvFile1.close() with open("daily_generated_attendance_csv/daily_generated_attendance_csv_" + date + ".csv", 'r') as csvFile1: reader1 = csv.reader(csvFile1) for lines in reader1: i = i + 1 if (i > 1): if (i % 2 != 0): iidd = str(lines[0]) + ' ' csvFile1.close() """ Inserting Details to SQL Database i.e Writing attendance[] and drop down menu choice to sql """ subid = 'PyAB' teacherid = '1' if(subjectchoice == 'Python'): subid = 'PyAB' teacherid = '1' elif(subjectchoice == 'DBMS'): subid = 'DbKM' teacherid = '2' elif(subjectchoice == 'TCS'): subid = 'TcSY' teacherid = '3' elif(subjectchoice == 'OS'): subid = 'OsMR' teacherid = '4' myId = str(ID) myId = myId[1:-1] cursor.execute('INSERT INTO attends(studid, subid, attdate, atttime) VALUES(%s,%s,%s,%s)', (myId, subid, str(date), str(timeStamp))) conn.commit() print("Attendance inserted") cap.release() cv2.destroyAllWindows() notifier.configure( text='CONSOLE: Thank You! Please Check the Attendance Sheet..') csv_updater()
import csv import numpy as np with open('sample.csv', newline='', errors='ignore') as csvfile: rows = csv.reader(csvfile) for row in rows: a = row[0] b = row[1] a = '%03d' % int(a) txt_path = './test/txt/p225/' + 'p225_' + a + '.txt' print(txt_path) f = open(txt_path, 'w') f.write(b) f.close()
# -*- coding: utf-8 -*- """ Created on Mon Sep 28 14:38:35 2020 @author: Slayer20 """ import csv exp=[] imp=[] with open("C:\Users\Slayer20\Downloads\Course Resources\synergy_logistics_database.csv", "r") as archivo_csv: lector = csv.reader(archivo_csv) for linea in lector: exp.append(linea[2]) imp.append(linea[3]) imp.pop(0) exp.pop(0) agrup = zip(exp,imp) i=1 cont=[] com=[] while i <= len(agrup): o = agrup[i] x = agrup.count(o) com.append(o) cont.append(x) i=i+1
def capture_img(): """ This function takes images of the student and stores them in a 'dataset/' directory """ notifier.configure(text='CONSOLE: Capturing Images.. Creating A Dataset..') # Firstly make sure all the directories are present path_existence("dataset/") path_existence("daily_generated_attendance_csv/") path_existence("student_details/") path_existence("trainer/") path_existence("trainer/trainer.yml") columns = ['SERIAL NO.', 'ID', 'NAME', 'GENDER', 'AGE', 'PHONE NUMBER', 'ADDRESS'] serial = 0 exists = os.path.isfile("student_details/student_details.csv") if exists: with open("student_details/student_details.csv", 'r') as csvFile1: reader1 = csv.reader(csvFile1) for l in reader1: serial = serial + 1 csvFile1.close() else: with open("student_details/student_details.csv", 'a+') as csvFile1: writer = csv.writer(csvFile1) writer.writerow(columns) serial = 1 csvFile1.close() Id = (txtfield1.get()) name = (txtfield2.get()) gender = (txtfield3.get()) age = (txtfield4.get()) phonenumber = (txtfield5.get()) address = (txtfield6.get()) """ Checking if user with same id exists, if yes then update """ lst = [0] idchecker = False cursor.execute('select * from student_details') for x in cursor.fetchall(): lst.append(x[0]) for i in lst: if Id == i: idchecker = True break else: pass if idchecker == True: """ User exists, hence needs to be updated """ notifier.configure( text='CONSOLE: User already exists, Please Update Manually In The View Database section..') else: """ User doesnt exist, hence needs to be added """ cap = cv2.VideoCapture(0) face_cascade = cv2.CascadeClassifier( 'assets/haarcascade_frontalface_default.xml') count = 0 while (True): ret, frame = cap.read() gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_rects = face_cascade.detectMultiScale( gray, 1.3, 5) # img, scaleFactor=1.1, minNeighbors=5, for (x, y, w, h) in face_rects: # Note: Unlike matplotlib.pyplot, cv2 takes images as BGR instead of RGB cv2.rectangle(frame, (x, y), (x + w, y + h), (225, 206, 128), 2) count += 1 cv2.imwrite("dataset/ " + name + "." + str(serial) + "." + Id + '.' + str(count) + ".jpg", gray[y:y + h, x:x + w]) cv2.imshow('Learning Your Face', frame) if cv2.waitKey(100) & 0xFF == ord('q'): break elif count >= 100: notifier.configure( text='CONSOLE: Images Have Been Captured Successfully..') break cap.release() cv2.destroyAllWindows() """ Saving Details to student_details.csv """ row = [serial, Id, name, gender, age, phonenumber, address] with open('student_details/student_details.csv', 'a+') as csvFile: writer = csv.writer(csvFile) writer.writerow(row) csvFile.close() """ Inserting Details to SQL Database """ cursor.execute('INSERT INTO student_details(studid, stdname, gender, age, phoneno, address) VALUES(%s,%s,%s,%s,%s,%s)', (Id, name, gender, age, phonenumber, address)) conn.commit() print("Record inserted")
easy_install_ep = [k for k in console if re.match(r'easy_install(-\d\.\d)?$', k)] for k in easy_install_ep: del console[k] # Generate the console and GUI entry points specified in the wheel if len(console) > 0: generated.extend(maker.make_multiple(['%s = %s' % kv for kv in console.items()])) if len(gui) > 0: generated.extend(maker.make_multiple(['%s = %s' % kv for kv in gui.items()], {'gui': True})) record = os.path.join(info_dir[0], 'RECORD') temp_record = os.path.join(info_dir[0], 'RECORD.pip') with open_for_csv(record, 'r') as record_in: with open_for_csv(temp_record, 'w+') as record_out: reader = csv.reader(record_in) writer = csv.writer(record_out) for row in reader: row[0] = installed.pop(row[0], row[0]) if row[0] in changed: row[1], row[2] = rehash(row[0]) writer.writerow(row) for f in generated: h, l = rehash(f) writer.writerow((f, h, l)) for f in installed: writer.writerow((installed[f], '', '')) shutil.move(temp_record, record) def _unique(fn): @functools.wraps(fn)
import csv import os import logging import yaml pat_dict = {} visit_dict = {} input_file = os.path.join(os.path.dirname(__file__), "..", "config", "identifiers.csv") output_file = os.path.join(os.path.dirname(__file__), "..", "config", "identifiers.yml") with open(input_file, newline="") as f: csvreader = csv.reader(f, delimiter=",", quotechar="\"") for i in range(5): next(csvreader) rows = map(lambda x: x[1:], csvreader) rowst = list(map(list, zip(*rows))) for row in rowst: pat, visit, *row1 = list( map( lambda x: x.replace(" ", "").replace(u"\xa0", "").replace( "=", ":"), row)) row2 = map(lambda x: x.upper(), filter(lambda x: ":" in x, row1)) ids = list(row2) if pat != "N/A" and pat != "": pat_dict[pat] = ids if visit != "N/A" and visit != "": visit_dict[visit] = ids
data_folder_normalized = "users/data_normalized/" ''' features=np.concatenate(([np.array(hardCodedPressure)[:,1]],[np.array(accMomVariance)[:,1]],[np.array(accBabyVariance)[:,1]],[np.array(preMomVariance)[:,1]],[np.array(preBabyVariance)[:,1]],[np.array(accCorrelation)[:,1]], \ [np.array(accDifference)[:,1]],[np.array(preDifference)[:,1]],[np.array(selfAccDiffMom)[:,1]],[np.array(selfAccDiffBaby)[:,1]],[np.array(selfPreDiffMom)[:,1]], \ [np.array(selfPreDiffBaby)[:,1]],[np.array(preCorrelation)[:,1]],[np.array(filteredPreMom)[:,1]],[np.array(filteredPreBaby)[:,1]]),axis=0) ''' if __name__ == '__main__': files = os.listdir(data_folder) for f in files: _data = [] file = open(data_folder + str(f), "r") reader = csv.reader(file) for row in reader: _data.append(map(float, row)) file.close() _data = np.array(_data) for i in range(15): if i != 0 and i != 5 and i != 12: #print _data[:, i] if np.max(_data[:, i]) - np.min(_data[:, i]) == 0: _data[:, i] = 0.5 else: _data[:, i] = (_data[:, i] - np.min(_data[:, i])) / (
def get_reader(self, csv_data, **reader_kwargs): reader = csv.reader(StringIO(csv_data), **reader_kwargs) yield reader
# Split the bam file by cluster ID. # Credited to https://divingintogeneticsandgenomics.rbind.io/post/split-a-10xscatac-bam-file-by-cluster/ import os import sys import pysam import csv cluster_file = sys.argv[1] bam_file = sys.argv[2] output_location = sys.argv[3] output_prefix = sys.argv[4] cluster_dict = {} with open(cluster_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter='\t') # skip header header = next(csv_reader) for row in csv_reader: cluster_dict[row[0]] = row[1] clusters = set(x for x in cluster_dict.values()) fin = pysam.AlignmentFile(bam_file, "rb") # open the number of bam files as the same number of clusters, and map the out file handler to the cluster id, # write to a bam with wb fouts_dict = {} for cluster in clusters: output_filename = os.path.join(output_location, "{}_{}.bam".format(output_prefix, cluster))
import csv import models, datasets labelfile = "../resources/labels.csv" testfile = "../resources/test.csv" # testfile = "../resources/train.csv" with open(labelfile, newline='') as csvfile: data = list(csv.reader(csvfile)) label_dic = {} for s in data: label_dic[s[0]] = s[1] # print (label_dic) with open(testfile, newline='') as csvfile: data = list(csv.reader(csvfile)) total = 0 correct = 0 for s in data: right_label = label_dic[s[0]] message = s[1] intent, probability = models.classify_intent( models.get_classifier(), models.get_vectorizer(),
help='notifications file') parser.add_argument('-c', '--codes', type=argparse.FileType('r'), help='Optional file with codes. If present, the ' 'file will be populated with codes. ' 'No codes will be sent') parser.add_argument('--config', type=str, dest='config_file', default='config.ini', help='config file (detaulf config.ini)') args = parser.parse_args() # Read configuration if not os.path.isfile(args.config_file): print(f'Config file {args.config_file} is missing. Aborting') exit(1) with open(args.config_file) as fp: config = configparser.ConfigParser() config.read_file(fp) # Read data reader = csv.DictReader(args.notif_file) data = list(reader) codes = None send = True if args.codes: codes = [code_line[0] for code_line in csv.reader(args.codes)] send = False main(data=data, codes=codes, notif_file=args.notif_file, config=config, send=send)
def preprocess(filename): with open(filename, 'r') as f: dataset = [row for row in csv.reader(f.read().splitlines())] return dataset
'Retailers List/Business Data/processed_data/WA' FDA_dir = 'C:/Users/lpatterson/AnacondaProjects/Tribal_Master' fda_df = pd.read_excel(FDA_dir + '/input/Public retail data_original.xlsx') fda_df = fda_df.loc[fda_df['State'] == 'WA', :] fda_df.reset_index(inplace=True) # load WA tab-delimited data sets os.chdir(WA_rawdir) rawfiles = [] for file in glob.glob("*.txt"): rawfiles.append(file) for k in rawfiles: # some inconsistencies with the tab delimited data, so we need to first process with csv file = open(k, 'rt', encoding="utf8") reader = csv.reader(file, delimiter='\t', quotechar=None) csv_list = [] for m, l in enumerate(reader): if m % 100000 == 0: print(m) csv_list.append(l) # make dataframe with first row as column headers biz_df = pd.DataFrame(csv_list) biz_df.columns = biz_df.iloc[0] # assign column number as name if missing biz_df.columns = [ 'col' + str(i) if j is None else j for i, j in enumerate(biz_df.columns) ] biz_df = biz_df[1:]
A biblioteca Lib/csv.py tem as funções: 1. csv.reader; 2. csv.writer. Parâmetros necessários para a leitura e escrita de arquivos CSV usando a Lib/csv.py: a. Delimiter - Caractere utilizado para separar campos; b. Quotechar - Caractere usado para campos que contém caracteres especiais; c. Quoting - Controla quando as cotações devem ser geradas pelo escritor e reconhecidas pelo leitor; d. newline = '' ''' #Escrevendo em um arquivo CSV import csv print('Biblioteca importada.') with open('planilha.csv', 'w', newline='') as csvfile: spamwriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) spamwriter.writerow(['Spam'] * 5 + ['Texto Adicional']) spamwriter.writerow(['linha1', 'linha2', 'linha3']) with open('planilha.csv', 'r', newline='') as arquivo: spamreader = csv.reader(arquivo, delimiter=' ', quotechar='|') print('Tipo de spamreader: ', type(spamreader)) for linha in spamreader: print('Tipo de linha: ', type(linha)) print(', '.join(linha))