def write_full_link_data(): from numpy import loadtxt from csv import reader as csvreader from json import dump from numpy import nan full_link_ids = loadtxt(filenames['full_link_ids'], dtype='int') V = [] with open(filenames['data_trips'],'rb') as readfile: reader = csvreader(readfile) for line in reader: V.append(map(float, [line[i-1] if bool(line[i-1]) else nan for i in full_link_ids])) print(reader.line_num) dump(V, open(filenames['full_link_trips'], 'wb')) V = [] with open(filenames['data_traveltimes'],'rb') as readfile: reader = csvreader(readfile) for line in reader: V.append(map(float, [line[i-1] if bool(line[i-1]) else nan for i in full_link_ids])) print(reader.line_num) dump(V, open(filenames['full_link_traveltimes'], 'wb')) return None
def get_sheet_configuration(self, sheet_name): if sys.version > '3': # If Python 3 or greater with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file: r = csvreader(main_sheet_file) heading_row = next(r) else: # If Python 2 with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file: r = csvreader(main_sheet_file, encoding=self.encoding) heading_row = next(r) if len(heading_row) > 0 and heading_row[0] == '#': return heading_row[1:] return []
def main(argv): if len(argv) < 2: print "Usage: {0} <csv file>".format(argv[0]) return filename = argv[1] csvfile = open(filename, "rb") csviter = csvreader(csvfile, delimiter=',') mintime = None for line in csviter: time = line[0] channelA = line[1] channelB = line[2] current = channelToCurrent(channelA) voltage = channelToVoltage(channelB) power = current * voltage if not mintime: mintime = int(time) offsettime = int(time) - mintime output = [ time, channelA, channelB, current, voltage, power, offsettime ] print ','.join([str(x) for x in output]) csvfile.close()
def read_file(csvfilename): """ Read the csv file. Each line of the file is a leg. A trip is N consecutive rows. All the legs in a trip will have the same RECORD id (and PRICE and DURATION, which is the length of the total trip in minutes). """ trips={} with open(csvfilename) as csvfile: csvdata = csvreader(csvfile) header=None for row in csvdata: if len(row) == 0: continue if row[0] == 'RECORD': header=row else: # this row is the leg of a trip leg = {} for (fld, val) in zip(header, row): leg[fld] = val if leg['RECORD'] not in trips: trips[leg['RECORD']] = Trip(leg['RECORD']) trips[leg['RECORD']].add_leg(leg) return trips
def Phase2(link_id_list, trips=TRIPS): from numpy import array, nan, loadtxt, isnan from csv import reader as csvreader W = loadtxt(filenames['W_trips']) # W = 8760 x 50 print 'W read', W.shape HT = [] link_id_list = sorted(link_id_list) reader = csvreader(open(filenames['data_trips_transpose'], 'rb')) link_id_old = 0 E = [] for link_id in link_id_list: for skip in range(link_id_old,link_id): trend = reader.next() link_id_old = link_id trend = [float(entry) if bool(entry) else nan for entry in trend] if sum(~isnan(trend)) == 0: continue # trend should have length 8760 with NaNs red_dots, = plot(range(1,49),trend[0+24*24:48+24*24],'ro') coeffs, error = find_decomposition(trend, W) E.append(error) print link_id,'\t', error with open('./Phase2_results.txt','ab') as writefile: writefile.write(str(link_id)+'\t'+str(error)+'\n') HT.append(coeffs) with open('Errors.csv','wb') as Errorfile: Errorfile.write(','.join(map(str,link_id_list))) Errorfile.write('\n') Errorfile.write(','.join(map(str,E))) return array(HT).T, red_dots # This is H for link_id_list
def applyTyp(val: str, typ: type) -> object: # execute val=typ(val) if ismodule(typ): typ = typ.__name__ typ = TYPE_ALIASES.get(typ, typ) if typ is bytes: return val if typ is bool: return val == b'True' elif typ is str: if type(val) in (bytes, bytearray): val = val.decode() # remove trailing '": if len(val) and (val[0] == '"' and val[-1] == '"' or val[0] == "'" and val[-1] == "'"): val = val[1:-1] return val elif typ == 'json': return json.loads(val) elif typ == 'csv': if type(val) is bytes: val = val.decode() if len(val) == 1: return [] # val is empty out = list(csvreader(val.split('\n'))) if len(out) == 1: out = out[0] return out return typ(val)
def get_dataset_csv(csv_to_path, casc_path, min_face_dim=(200, 200)): face_detector = FaceDetector(casc_path, min_face_dim=min_face_dim) pictures = [] labels = [] with open(csv_to_path, 'r') as csvfile: dataset = csvreader(csvfile, delimiter="|") for row in dataset: if len(row) == 2: image = imread(row[0]) image = cvtColor(image, COLOR_RGB2GRAY) face = face_detector.detect(image) if len(face) == 1: x, y, w, h = face[0] pictures.append(image[y:y + h, x:x + w]) imshow("Adding faces to traning set...", image[y:y + h, x:x + w]) waitKey(50) labels.append(int(row[1])) else: print( "Warning: Invalid detection on {}, {} faces detected". format(row[0], len(face)), file=stderr) else: raise ParserExecption("Your csv seems to be uncorrect.") destroyAllWindows() return pictures, labels
def the_loop(proc, args): from time import sleep data = map(lambda a: [0,0,0,0], range(args['n'])) while True: i = 0 got_something = False for p in proc: from Queue import Empty l = p.get_nowait() if l != None: from StringIO import StringIO from csv import reader as csvreader f = StringIO(l) r = csvreader(f, delimiter=',') for row in r: data[i] = map(lambda s: float(s), row) #print "From process %i:" % i, data[i] got_something = True i += 1 if not got_something: sleep(0.6) else: #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data) if args['o'] > 0: if sum(map(lambda l: 0 if len(l) < 5 else l[4], data)) > args['o']: print 'Reached %i operations, exiting...' % args['o'] return
def _get_clearpond(): """Get CLEARPOND neighbourhood density data as a dict. The method is :func:`~.utils.memoized` since it is called so often. Returns ------- dict `Dict` with two keys: `orthographic` and `phonological`. `orthographic` contains a dict associating words to their orthographic neighbourhood density (CLEARPOND's `OTAN` column). `phonological` contains a dict associating words to their phonological neighbourhood density (CLEARPOND's `PTAN` column). """ logger.debug('Loading Clearpond data') clearpond_orthographic = {} clearpond_phonological = {} with open(settings.CLEARPOND, encoding='iso-8859-2') as csvfile: reader = csvreader(csvfile, delimiter='\t') for row in reader: word = row[0].lower() if word in clearpond_phonological: raise Exception("'{}' is already is Clearpond phonological " 'dictionary'.format(word)) if word in clearpond_orthographic: raise Exception("'{}' is already is Clearpond orthographic " 'dictionary'.format(word)) clearpond_orthographic[word] = int(row[5]) clearpond_phonological[word] = int(row[29]) return {'orthographic': clearpond_orthographic, 'phonological': clearpond_phonological}
def the_loop(proc, args): from time import sleep data = map(lambda a: [0, 0, 0, 0], range(args['n'])) while True: i = 0 got_something = False for p in proc: from Queue import Empty l = p.get_nowait() if l != None: from StringIO import StringIO from csv import reader as csvreader f = StringIO(l) r = csvreader(f, delimiter=',') for row in r: data[i] = map(lambda s: float(s), row) #print "From process %i:" % i, data[i] got_something = True i += 1 if not got_something: sleep(0.6) else: #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data) if args['o'] > 0: if sum(map(lambda l: 0 if len(l) < 5 else l[4], data)) > args['o']: print 'Reached %i operations, exiting...' % args['o'] return
def applyTyp(val: str, typ: type) -> object: # execute val=typ(val) # print(val, typ, 8888888, val == b'True', TYPE_ALIASES.get(typ, typ)) typ = TYPE_ALIASES.get(typ, typ) if typ is bytes: return val if typ is bool: # if isinstance(val, str): # bool(s) where with len() >0 always returns True, therefore: return val == b'True' elif typ is str: if type(val) in (bytes, bytearray): val = val.decode() # remove trailing '": if len(val) and (val[0] == '"' and val[-1] == '"' or val[0] == "'" and val[-1] == "'"): val = val[1:-1] return val elif typ == 'json': return json.loads(val) elif typ == 'csv': if type(val) is bytes: val = val.decode() if len(val) == 1: return [] # val is empty out = list(csvreader(val.split('\n'))) if len(out) == 1: out = out[0] return out return typ(val)
def find_catids_early(datafile_path): """Scan the CSV file to learn what cat IDs it contains.""" catid_regex = '^(M|F)[\d]+$' with open(datafile_path, 'rb') as datafile: csvrows = csvreader(datafile) seen = dict() catids = list() for csvrow in csvrows: try: if csvrow[1] in seen: continue regex_match = re.match(catid_regex, csvrow[1], re.I) if regex_match: catids.append(csvrow[1]) seen[csvrow[1]] = 1 except IndexError: continue if len(catids) > 0: return catids else: return False
def get_data(): """ Opens a menu to select the openPO report text file. The file is then copied and the contents returned as a list of rows. """ file = filedialog.askopenfilename( title="Location of openPO", initialdir=r"%USER%\Desktop", filetypes=[ ("Plain Text", "*.txt"), ("CSV", "*.csv"), ], ) if file: copied = copy_file(file) data = [] with open(file, "r") as csvfile: reader = csvreader(csvfile, delimiter="\t") for row in reader: data.append(row) return data
def testversion(self): """открыть файл на чтение и найти там строку ;FILE_FORMAT=1, ;FILE_FORMAT=2 если ни одна строка не будет найдена, проверить следующие три предположения 1. может быть файл пуст 2. в файле нет программы 3. файл содержит некорректный формат """ try: self.format = None file = open(self.path2prg, 'r') for line in file: if ";FILE_FORMAT=1" in line: self.format = "v1" elif ";FILE_FORMAT=2" in line: self.format = "v2" file.close() if self.format==None: #возможно, это prg v1 без заголовка reader = csvreader(open(self.path2prg, 'r'), delimiter=" ", skipinitialspace=True) row = reader.__next__() if (len(row)>=5): print("prg v1?") self.format="v1" except OSError as err: print(err) return self.format
def find_catids_early(datafile_path): """Scan the CSV file to learn what cat IDs it contains.""" catid_regex = '^(M|F)[\d]+$' with open(datafile_path, 'rt') as datafile: csvrows = csvreader(datafile) seen = dict() catids = list() for csvrow in csvrows: temp_catid = csvrow[int(cfg_data_column_catid)] try: if temp_catid in seen: continue regex_match = re.match(catid_regex, temp_catid, re.I) if regex_match: catids.append(temp_catid) seen[temp_catid] = 1 except IndexError: continue if len(catids) > 0: return catids else: return False
def ReadObstaclesGeomsFile(csv_file): # mach_name, polyline = ([] for i in range(2)) mach_geoms = dict() with open( csv_file, 'r') as csvfile: # Python 2 : with open(csv_file, 'rb') as csvfile reader = csvreader(csvfile, delimiter=',') # Skip the first row of the CSV file next(reader) for row in reader: mach_name, corner_num, x, y = [r for r in row[:4]] if not mach_name in mach_geoms: mach_geoms[mach_name] = {} mach_geoms[mach_name]['vertices'] = [[float(x), float(y)]] else: mach_geoms[mach_name]['vertices'].append([float(x), float(y)]) for mach_name in mach_geoms.keys(): corners_coords = mach_geoms[mach_name]['vertices'] corners_coords.append( corners_coords[0]) # Add a copy of the first point mach_geoms[mach_name]['vertices'] = array( corners_coords) # Add a copy of the first point mach_geoms[mach_name]['polygon'] = Polygon( mach_geoms[mach_name]['vertices']) return mach_geoms
def scheduleParser(self): with open(self.ui.fileLocation_2.text(), "r") as f: reader = csvreader(f) next(reader) parsedSchedule = [] for row in reader: startTime = parser.parse(row[1]) startTime = (startTime.hour * 3600) + (startTime.minute * 60) endTime = parser.parse(row[2]) endTime = (endTime.hour * 3600) + (endTime.minute * 60) parsedSchedule.append((row[0], startTime, endTime, row[3].split(","), row[4].split(","))) f.close() reformattedSchedule = {} for ps in parsedSchedule: for pid in ps[3]: if str(pid) not in reformattedSchedule: reformattedSchedule[str(pid)] = [] for day in ps[4]: reformattedSchedule[str(pid)].append((ps[0], ps[1], ps[2], day)) return reformattedSchedule
def get_mp_data(): with open("antimp.csv") as fp: reader = csvreader(fp) li = list(reader) for i in li: i.append(0) return li
def testversion(self): """открыть файл на чтение и найти там строку ;FILE_FORMAT=1, ;FILE_FORMAT=2 если ни одна строка не будет найдена, проверить следующие три предположения 1. может быть файл пуст 2. в файле нет программы 3. файл содержит некорректный формат """ try: self.format = None file = open(self.path2prg, 'r') for line in file: if ";FILE_FORMAT=1" in line: self.format = "v1" elif ";FILE_FORMAT=2" in line: self.format = "v2" file.close() if self.format == None: #возможно, это prg v1 без заголовка reader = csvreader(open(self.path2prg, 'r'), delimiter=" ", skipinitialspace=True) row = reader.__next__() if (len(row) >= 5): print("prg v1?") self.format = "v1" except OSError as err: print(err) return self.format
def read_cac_scores(filename): scores = dict() with open(filename) as csvfile: for i, row in enumerate(csvreader(csvfile)): if i == 0: continue # skip first row = header scores[row[1].strip()] = float(row[-4].strip()) return scores
def nameFromSymbol(symbol): FILE_PATH = "data/fullCompanyList.csv" with open(FILE_PATH, "r") as f: companies = csvreader(f) for company in companies: if company[0] == symbol: return company[1] return None
def load_element_data(): element_file = open(ROOT_DIR + 'data/element.csv', 'rb') reader = csvreader(element_file, delimiter='\t') element_data = [] for row in reader: element_data.append([value for value in row]) element_file.close() return element_data
def load_element_data(): element_file = open(ROOT_DIR + 'data/element.csv', 'r') reader = csvreader(element_file, delimiter='\t') element_data = [] for row in reader: element_data.append([value for value in row]) element_file.close() return element_data
def get_sheet_configuration(self, sheet_name): with open(os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding) as main_sheet_file: r = csvreader(main_sheet_file) heading_row = next(r) if len(heading_row) > 0 and heading_row[0] == "#": return heading_row[1:] return []
def f_training_data(self): training_data = [] sentiment = [] tweets = [] #!!!!!!!!!!!!!!!!!!Better way to use CSV file!!!!!!!!!!!!!!!!! # x,y = numpy.loadtxt('training_2.txt', delimiter=',', unpack=True) #********************Gets the total number of rows*************** #using csv.reader() as csvreader check_length = csvreader( open('data/training_tennis.csv', encoding='latin-1')) length = sum(1 for row in check_length) #-500 print(length) #********************Saves the Tweets and sentiment************** raw_data = csvreader(open('data/training_tennis.csv', encoding='latin-1'), delimiter=",") count = 0 for row in raw_data: if count == 800: break tweets.append(row[1]) sentiment.append(row[0]) count += 1 ## print(self.tweets) processed_tweets = self.f_process_tweets(tweets) ## print(self.trprocessed_tweets) ## print(len(self.trprocessed_tweets), len(self.trsentiment)) for i in range(len(processed_tweets)): #to convert it in [([words],positive),([words],positive)] format temp = (processed_tweets[i], sentiment[i]) #to convert it in [[[words],positive],[[words],positive]] format ## temp=[] ## temp.append(self.trprocessed_tweets[i]) ## temp.append(self.trsentiment[i]) training_data.append(temp) ## print(self.training_data) random.shuffle(training_data) return training_data
def test_group_sample_ids(self): sample_response = join('tests', 'data', 'WQP', 'webservice.csv.as.txt') with open(sample_response, 'r') as f: wqp_service_csv = csvreader(f) unique_sample_ids = self.patient._group_rows_by_id(wqp_service_csv) self.assertEqual(len(list(unique_sample_ids.keys())), 1) self.assertEqual(len(unique_sample_ids['nwisaz.01.00000154']), 3) self.assertTrue('nwisaz.01.00000154' in unique_sample_ids)
def csv_to_list(csv_file, delimiter=','): """ Reads in a CSV file and returns the contents as list, where every row is stored as a sublist, and each element in the sublist represents 1 cell in the table. """ with open(csv_file, 'rb') as csv_con: reader = csvreader(csv_con, delimiter=delimiter) return list(reader)
def read_input(inputfile): ''' Read input CSV into NumPy array and transpose. Assumes last column in the input data is for classification and is ignored. ''' l = [] with open(inputfile) as f: for line in csvreader(f, delimiter=',', quotechar='"'): l.append(list(float(x) for x in line[:-1])) return np.array(l).T
def f_testing_data(self): testing_data = [] sentiment = [] tweets = [] #********************Gets the total number of rows*************** #using csv.reader() as csvreader check_length = csvreader( open('data/training_tennis.csv', encoding='latin-1')) length = sum(1 for row in check_length) - 800 print(length) #********************Saves the Tweets and sentiment************** raw_data = csvreader(open('data/training_tennis.csv', encoding='latin-1'), delimiter=",") count = 0 for row in raw_data: if raw_data.line_num > length: ## print(raw_data.line_num) if count == 100: break tweets.append(row[1]) sentiment.append(row[0]) count += 1 ## print(self.tetweets) processed_tweets = self.f_process_tweets(tweets) ## print(self.teprocessed_tweets) ## print(len(self.teprocessed_tweets), len(self.tesentiment)) for i in range(len(processed_tweets)): #to convert it in [([words],positive),([words],positive)] format temp = (processed_tweets[i], sentiment[i]) #to convert it in [[[words],positive],[[words],positive]] format ## temp=[] ## temp.append(self.teprocessed_tweets[i]) ## temp.append(self.tesentiment[i]) testing_data.append(temp) ## print(self.testing_data) ## random.shuffle(testing_data) return testing_data
def scheduleChecker(self): with open(self.ui.fileLocation_2.text(), "r") as f: reader = csvreader(f) next(reader) strError = "" i = 2 for row in reader: cell = "Row: " + str(i) + " Column: 1" try: strError = strError + self.verifyShiftID(row[0], cell) except: strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n" cell = "Row: " + str(i) + " Column: 2" try: strError = strError + self.verifyStartTime(row[1], cell) except: strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n" cell = "Row: " + str(i) + " Column: 3" try: strError = strError + self.verifyEndTime(row[2], cell) except: strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n" cell = "Row: " + str(i) + " Column: 4" try: strError = strError + self.verifyPorterID(row[3], cell) except: strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n" cell = "Row: " + str(i) + " Column: 5" try: strError = strError + self.verifyDays(row[4], cell) except: strError = strError + "Incorrect Syntax in Schedule File. Check " + cell + "\n" i = i + 1 if strError != "": break f.close() if strError == "": return strError else: strError = "*****SCHEDULE DATA FILE ERROR*****\n" + strError return strError
def read_input(infile): ''' Read input file into two arrays ''' a, b = [], [] with open(infile) as f: for line in csvreader(f, delimiter=',', quotechar='"'): if line: a.append([float(x) for x in line[:-1]]) b.append(line[-1]) return np.array(a).T, np.array(b)
def read_uikparams(): data = csvreader(open('uikparams.csv'), delimiter=',') next(data, None) # skip the headers Row = namedtuple('row', 'tik, raion, uik, voters, mn_in, mn_out, koib, addr_vote, place, doma, phone, url, uikpage, addr_komissii, phone_k') for row in data: try: x = Row(*row[:len(Row._fields)]) except: print(row) yield x.uik, dict(x._asdict(), koib = 'KOIB' if x.koib != '0' else '')
def loadcsv(filename): try: with open(filename, 'r') as fp: reader = csvreader(fp) dataset = list(reader) for i in range(len(dataset)): dataset[i] = [float(x) for x in dataset[i]] return dataset except: main()
def doodle(doodle): name = "" tmpdates = list() with open(doodle, 'r') as csvdoodle: csvcontent = csvreader(csvdoodle) rownum = 1 for row in csvcontent: row = str(row) if rownum == 1: # read in name of doodle name = row.split(";")[0][8:-1] Trainingsday = Day(name) elif rownum == 4: # read in dates of that doodle rowsplit = row[3:-2].split(";") last = "" for training in rowsplit: if not training == "": training = training.split(" ")[0] else: training = last tmpdates.append(training) last = training elif rownum == 5: rowsplit = row[3:-2].split(";") for day in range(len(rowsplit)): month = tmpdates[day] Trainingsday.trainings.append( Training(str(rowsplit[day] + "." + month))) elif rownum >= 6: # read in TL and their preferences if row[2:7] == 'Count': break else: discpart = row[2:-2].split(" ")[0].replace(':', '') TLname = row[(3 + len(discpart)):-2].split(";")[0] TLdiscs = discpart.split("/") if len(TLdiscs) > 1: TLname = TLname + "*" for i in range(len(Trainingsday.trainings)): wish = row[2:-2].split(";")[i + 1] if wish != "": for attr, value in Trainingsday.trainings[ i].__dict__.items(): for dis in TLdiscs: dis = dis.upper() if attr == dis and wish == "OK": value.append(TLname + ",\n") elif attr == 'CAN' and wish == "(OK)": disz = "" for d in TLdiscs: disz = disz + d value.append( (disz + " " + str(TLname) + ",\n")) rownum += 1 return Trainingsday
def load_raw_data_from_csv(filename): prices = [] filename = filename if filename[:5] == 'data/' else 'data/{}'.format( filename) with open(filename, 'r') as f: f = csvreader(f, delimiter='\t') for i, line in enumerate(f): if i == 0: # Header continue # date, price_o, price_h, price_l, price_c, volume, mkt_cap prices.append(line) return prices
def get_csv(url): response = get(url) response.raise_for_status() try: print('query completed in {}'.format(response.elapsed)) print('new sites found {}'.format(response.headers['total-site-count'])) print('new results found {}'.format(response.headers['total-result-count'])) except: pass return csvreader(response.text.splitlines())
def main(argv): if len(argv) < 3: print 'Usage: {0} <filename> <window size>'.format(argv[0]) return filename = argv[1] window_size = int(argv[2]) csvfile = open(filename, 'rb') csviter = csvreader(csvfile, delimiter=',') current_win = [] voltage_win = [] time_win = [] for line in csviter: if len(line) < 7: print 'use add_power.py before' return time = int(line[0]) channelA = int(line[1]) channelB = int(line[2]) current = float(line[3]) voltage = float(line[4]) power = float(line[5]) offsettime = int(line[6]) if len(current_win) >= window_size: current_med = median(current_win) voltage_med = median(voltage_win) time_med = median(time_win) new_current_win = current_win[1:] new_voltage_win = voltage_win[1:] new_time_win = time_win[1:] current_win = new_current_win voltage_win = new_voltage_win time_win = new_time_win power_med = current_med * voltage_med output = [time_med, current_med, voltage_med, power_med] print ','.join(str(x) for x in output) current_win.append(current) voltage_win.append(voltage) time_win.append(offsettime) csvfile.close()
def f_gar_sentiment(self, training_featured_words): gtweets = [] gsentiment = [] gar_data = [] #********************Gets the total number of rows*************** #using csv.reader() as csvreader check_length = csvreader( open('data/garbi_tweets.csv', encoding='latin-1')) length = sum(1 for row in check_length) #-500 print(length) #********************Saves the Tweets and sentiment************** file = open('data/garbi_tweets.csv').read() count = 0 for r in file.split('\n'): if count == 500: break gtweets.append(r) gsentiment.append('None') count += 1 ## print(gtweets) gar_tweets = self.f_process_tweets(gtweets) ## print(gar_tweets) for i in range(len(gar_tweets)): if gar_tweets[i]: temp = (gar_tweets[i], gsentiment[i]) gar_data.append(temp) ## print(gar_data) gar_words = self.f_feature_word(self.f_specific_all_words(gar_tweets)) ## print(gar_words) gar_set = self.f_feature_set(gar_data, training_featured_words) ## print(gar_set) self.gloop_count = 0 self.gpos = 0 self.gneg = 0 self.gneut = 0 for tweets in range(len(gar_set)): self.gloop_count += 1 if self.voted_classifier.f_classify( gar_set[tweets][0]) == 'positive': self.gpos += 1 elif self.voted_classifier.f_classify( gar_set[tweets][0]) == 'negative': self.gneg += 1 elif self.voted_classifier.f_classify( gar_set[tweets][0]) == 'neutral': self.gneut += 1 print(self.gloop_count, self.gpos, self.gneg, self.gneut)
def f_novak_sentiment(self, training_featured_words): ntweets = [] nsentiment = [] novak_data = [] #********************Gets the total number of rows*************** #using csv.reader() as csvreader check_length = csvreader( open('data/novak_tweets.csv', encoding='latin-1')) length = sum(1 for row in check_length) #-500 print(length) #********************Saves the Tweets and sentiment************** file = open('data/novak_tweets.csv').read() count = 0 for r in file.split('\n'): if count == 1000: break ntweets.append(r) nsentiment.append('None') count += 1 ## print(ntweets) novak_tweets = self.f_process_tweets(ntweets) ## print(novak_tweets) for i in range(len(novak_tweets)): if novak_tweets[i]: #to remove empty lists temp = (novak_tweets[i], nsentiment[i]) novak_data.append(temp) ## print(novak_data) novak_words = self.f_feature_word( self.f_specific_all_words(novak_tweets)) ## print(self.novak_words) novak_set = self.f_feature_set(novak_data, training_featured_words) ## for i in range(10): ## print(novak_set[i]) self.nloop_count = 0 self.npos = 0 self.nneg = 0 self.nneut = 0 for tweets in range(len(novak_set)): self.nloop_count += 1 if self.voted_classifier.f_classify( novak_set[tweets][0]) == 'positive': self.npos += 1 elif self.voted_classifier.f_classify( novak_set[tweets][0]) == 'negative': self.nneg += 1 elif self.voted_classifier.f_classify( novak_set[tweets][0]) == 'neutral': self.nneut += 1 print(self.nloop_count, self.npos, self.nneg, self.nneut)
def download_data(link): # Download and parse the CSV file file_url = link data = [d.decode('utf-8') for d in request.urlopen(file_url).readlines()] # Add blank space for missing cities to prevent dropping columns for n, row in enumerate(data): data[n] = " " + row if row[0] == "," else row # Split each row into a list of data data_split = [row for row in csvreader(data)] return data_split
def _import(self, f): storage = queryUtility(IShortURLStorage) reader = csvreader(f) error = None for row in reader: if len(row) < 2: # Ignore rows with too few columns, this also deals with # empty rows continue if SHORTURLRE.match(row[0]) is None: # Ignore funny characters return _(u'Your upload contains invalid characters.') storage.add(row[0], row[1]) return error
def open_csv(bus, fn): reader = csvreader(open(fn, 'rb')) service = {} name = reader.next()[0] busName = dbus.service.BusName(name, bus) for row in reader: path, typ, value = row[:3] if not value.strip(): # Invalid value = dbus.Array([], signature=dbus.Signature('u'), variant_level=1) service[path] = DbusPathObject(busName, path, wrap(typ, value)) return DbusRootObject(busName, service)
def load_element_data(): try: import mathics_scanner datadir = mathics_scanner.__file__[:-11] element_file = open(os.path.join(datadir, 'data/element.csv'), 'r') except: print(os.path.join(datadir, 'data/element.csv'), " not found.") return None reader = csvreader(element_file, delimiter='\t') element_data = [] for row in reader: element_data.append([value for value in row]) element_file.close() return element_data
def get_type_parts(part_type): """ Returns a list of part numbers based on part_type. :param part_type: String indicating 'HDD', 'MEM', or 'CPU' :return: List of part numbers from csv file """ file = pathjoin("parts_in_sp", "all" + part_type + ".csv") data = [] with open(file, "r") as csvfile: reader = csvreader(csvfile) for row in reader: data.append(row[0]) return data
def get_sheet_headings(self, sheet_name): sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]] configuration_line = 1 if sheet_configuration else 0 if not sheet_configuration: sheet_configuration = self.base_configuration if not self.use_configuration: sheet_configuration = {} skip_rows = sheet_configuration.get("skipRows", 0) if sheet_configuration.get("ignore"): # returning empty headers is a proxy for no data in the sheet. return [] if sys.version > '3': # If Python 3 or greater with open(os.path.join(self.input_name, sheet_name+'.csv'), encoding=self.encoding) as main_sheet_file: r = csvreader(main_sheet_file) for num, row in enumerate(r): if num == (skip_rows + configuration_line): return row else: # If Python 2 with open(os.path.join(self.input_name, sheet_name+'.csv')) as main_sheet_file: r = csvreader(main_sheet_file, encoding=self.encoding) for num, row in enumerate(r): if num == (skip_rows + configuration_line): return row
def readCsvFile(self, fileName, delimiter=","): """ Reads csv file and returns a List with each row as an element """ if os.path.exists(fileName): try: filehandle = open(fileName, "rU") reader = csvreader(filehandle, delimiter=delimiter) retlist = [] for row in reader: if row: retlist.append(row) filehandle.close() return retlist, True except: return "ERROR: %s" % str(format_exc()), False else: return "ERROR: File \"%s\" does not exists." % fileName, False
def _down(self,indelimeter): """скачивает данные в заданном формате""" try: reader = csvreader(open(self.path2prg, 'r'), delimiter=indelimeter, skipinitialspace=True) self.program = list() self.title = list() for row in reader: #команда if (len(row)>=5): line=row[0] if (line[0]!=";"): self.program.append(row) #заголовок или комментарий if len(row)>0 and ";" in row[0]: self.title.append(row) except OSError as err: print(err) self.program = None self.title = None
def test_csv_export(self): output = self.portal.container.unrestrictedTraverse('@@collective.excelexportcsv')() generated_path = self._get_generated_filepath(output, 'test.csv') lines = csvreader(open(generated_path), dialect='excel', delimiter=';') headers_row = lines.next() self.assertEqual(headers_row, ['Name', 'Biography', 'Birth date', 'subscription', 'amount', 'Languages', 'Photo', 'Related Items']) row1 = lines.next() self.assertEqual(row1, ['John Doe', 'Longtemps, je me suis couch\xe9 de bonne heure', '1980/07/24', 'silver', '100', 'English\nFran\xe7ais', 'logoplone.png', '']) row2 = lines.next() self.assertEqual(row2, ['John Smith', "Je forme une entreprise qui n'eut jamais d'exem...", '1981/07/24', '', '100', 'English\nEspa\xf1ol', '', 'John Doe']) os.remove(generated_path)
def the_loop(proc, args): from time import sleep, time data = list(map(lambda a: [0,0,0,0], range(args.n[0]))) t = time() while True: i = 0 got_something = False for p in proc: from Queue import Empty l = p.get_nowait() if l != None: from StringIO import StringIO from csv import reader as csvreader f = StringIO(l) r = csvreader(f, delimiter=',') for row in r: data[i] = map(lambda s: float(s), row) #print "From process %i:" % i, data[i] got_something = True i += 1 if time() > t + 5: from random import randint t = time() r = randint(0,1) l = args.min[0] if r == 1: l = args.max[0] for p in proc: p.set_load(l) print "Set load to ", l if not got_something: sleep(1.0) else: #print map(lambda l: (0,0,0,0,0) if len(l) < 5 else (l[2], l[2] - l[3], l[4]), data) if args.o[0] > 0: if sum(map(lambda l: 0 if len(l) < 5 else l[4], data)) > args.o[0]: print 'Reached %i operations, exiting...' % args.o[0] return
def insbatchform(request): LOGGER.debug('Proceedings import page accessed using method {}'.format(request.method), request) err_message = '' uid = request.user.id uname = request.user.username if request.method == 'POST': button = getbutton(request) if button == 'load': infile = request.FILES.get('load') if not infile: err_message = 'Nejprve zvolte soubor k načtení' else: errors = [] try: count = 0 with infile: idx = 0 for line in csvreader(StringIO(infile.read().decode())): idx += 1 errlen = len(errors) if not line: continue desc = line[0].strip() if not desc: errors.append((idx, 'Prázdný popis')) continue if len(desc) > 255: errors.append((idx, 'Příliš dlouhý popis')) continue try: number = int(line[1]) assert number > 0 except: errors.append((idx, 'Chybné běžné číslo')) continue try: year = int(line[2]) assert year >= 2008 except: errors.append((idx, 'Chybný ročník')) continue detailed = line[3].strip() if detailed == 'ano': detailed = True elif detailed == 'ne': detailed = False else: errors.append((idx, 'Chybný údaj pro pole Vše')) continue if len(errors) == errlen: try: Insolvency.objects.update_or_create( uid_id=uid, desc=desc, defaults={ 'number': number, 'year': year, 'detailed': detailed} ) except: errors.append((idx, 'Popisu "{}" odpovídá více než jedno řízení'.format(desc))) continue count += 1 LOGGER.info('User "{}" ({:d}) imported {:d} proceedings'.format(uname, uid, count), request) return render( request, 'sir_insbatchresult.xhtml', {'app': APP, 'page_title': 'Import řízení ze souboru', 'count': count, 'errors': errors}) except: # pragma: no cover LOGGER.error('Error reading file', request) err_message = 'Chyba při načtení souboru' return render( request, 'sir_insbatchform.xhtml', {'app': APP, 'page_title': 'Import řízení ze souboru', 'err_message': err_message})
def partybatchform(request): LOGGER.debug('Party import page accessed using method {}'.format(request.method), request) err_message = '' uid = request.user.id uname = request.user.username if request.method == 'POST': button = getbutton(request) if button == 'load': infile = request.FILES.get('load') if not infile: err_message = 'Nejprve zvolte soubor k načtení' else: errors = [] try: count = 0 with infile: idx = 0 for line in csvreader(StringIO(infile.read().decode())): idx += 1 errlen = len(errors) if not line: continue line = line[0].strip() if ':' in line: line, party_opt = line.split(':', 1) else: party_opt = '*' if not between(MIN_LENGTH, len(line), MAX_LENGTH): errors.append((idx, 'Chybná délka řetězce')) continue if party_opt not in TEXT_OPTS_ABBR: errors.append((idx, 'Chybná zkratka pro posici')) continue if len(errors) == errlen: try: Party.objects.update_or_create( uid_id=uid, party=line, defaults={'party_opt': TEXT_OPTS_AI[party_opt]} ) except: errors.append((idx, 'Řetězci "{}" odpovídá více než jeden účastník'.format(line))) continue count += 1 LOGGER.info('User "{}" ({:d}) imported {} party/ies'.format(uname, uid, count), request) return render( request, 'sur_partybatchresult.xhtml', {'app': APP, 'page_title': 'Import účastníků řízení ze souboru', 'count': count, 'errors': errors}) except: # pragma: no cover LOGGER.error('Error reading file', request) err_message = 'Chyba při načtení souboru' else: LOGGER.debug('Invalid form', request) err_message = INERR return render( request, 'sur_partybatchform.xhtml', {'app': APP, 'page_title': 'Import účastníků řízení ze souboru', 'err_message': err_message, 'min_length': MIN_LENGTH, 'max_length': MAX_LENGTH})
# Columns align aligns = {0: '<'} # Right padding (left-aligned) default_align = '>' # Left padding (right-aligned) head_aligns = {0: '^'} # Center align default_head_align = '>' files = [] for arg in argv[1:]: if isfile(arg): files.append(arg) if not files: print ('No file specified') exit(1) cfile = csvreader(open(files[0]), delimiter =';', quotechar ='"') rows = [] maxs = [] for row in cfile: rows.append(row[:]) if not maxs: maxs = [len(field) for field in row] else: if len(row) > len(maxs): maxs.extend([0] * (len(row) - len(maxs))) for i in range(len(row)): if maxs[i] < len(row[i]): maxs[i] = len(row[i]) # Header format and show
def process_file(self, file, rewrite=False): csv = self.csv self.__init__(csv, rewrite=rewrite) settings = csv.settings.copy() # convert settings["add"] to lambdas adds = [] for it in settings["add"]: # [("netname", 20, [lambda x, lambda x...]), ...] methods = self.csv.guesses.get_methods_from(it[0], it[2], it[3]) adds.append((it[0], it[1], methods)) del settings["add"] settings["addByMethod"] = adds if len(settings["chosen_cols"]) == len(csv.fields): del settings["chosen_cols"] if not settings["dialect"]: settings["dialect"] = csv.dialect settings["target_file"] = csv.target_file with open(file, "r") as sourceF: reader = csvreader(sourceF, dialect=csv.dialect) if csv.has_header: # skip header reader.__next__() for row in reader: if not row: # skip blank continue csv.line_count += 1 if csv.line_count == csv.line_sout: now = datetime.datetime.now() delta = (now - csv.time_last).total_seconds() csv.time_last = now if delta < 1 or delta > 2: newVel = ceil(csv.velocity / delta) + 1 if abs(newVel - csv.velocity) > 100 and csv.velocity < newVel: # smaller accelerating of velocity (decelerating is alright) csv.velocity += 100 else: csv.velocity = newVel csv.line_sout = csv.line_count + 1 + csv.velocity csv.informer.sout_info() try: self.process_line(csv, row, settings) except BdbQuit: # not sure if working, may be deleted print("BdbQuit called") raise except KeyboardInterrupt: print("Keyboard interrupting") try: print("{} line number, {} ip".format(csv.line_count, ip)) except: pass o = Dialogue.ask( "Catched keyboard interrupt. Options: continue (default, do the line again), [s]kip the line, [d]ebug, [q]uit: ") if o == "d": print( "Maybe you should hit n multiple times because pdb takes you to the wrong scope.") # I dont know why. ipdb.set_trace() elif o == "s": continue # skip to the next line elif o == "q": quit() self._close_descriptors() else: # continue from last row csv.line_count -= 1 # let's pretend we didn't just do this row before and give it a second chance self.process_line(csv, row, settings) self._close_descriptors() if self.csv.is_split: attch = set() for at in self.csv.attachments: attch.add(at.path) for f in self.files_created: if f not in attch and f != Config.INVALID_NAME: # set that a mail with this attachment have not yet been sent self.csv.attachments.append(Attachment(None, None, f)) Attachment.refresh_attachment_stats(self.csv)
def __init__(self, reader, delim=";", quote='"'): self.reader = csvreader(reader, delimiter=delim, quotechar=quote) self.rows = deque() self.buffer = ""
template = argv[1] csv_files = argv[2:] print '-- Converting CSV file to templatized HTML --' print '-- Using template: %s' % (template) print '-- Operating on CSV files: %s' % (csv_files) counter = 0 with open(template, 'rb') as template: template = template.read() template = Template(template) for f in csv_files: with open(f, 'rb') as csvfile: csvdata = csvreader(csvfile) keys = None for row in csvdata: if not keys: keys = row; continue stats = OrderedDict(zip(keys,row)) laptop_name = '%s %s' % (stats['Manufacturer'], stats['Model #']) OS = row[keys.index('Installed OS')] username,password = stats['Username-Password'].split('-') del stats['Notes'] del stats['Username-Password'] with open(OUTFNAME_TEMPLATE % (counter), 'wb') as out:
) # Using the argument parser to do a lot of work: # * Prefer command line arguments # * Fall back on config file values # * Convert to appropriate types # * Check validity of arguments args = argman.parse_args() # Make sure integer arguments are in a reasonable range. args.dot_size = catcm.constrain_integer(args.dot_size, 2, 100) args.perimeter_resolution = catcm.constrain_integer(args.perimeter_resolution, 1, 120) # Open and process the data file with open(args.datafile_path, 'rt') as datafile: csvrows = csvreader(datafile) # Limit to certain cats, if requested if args.catids: csvrows = [csvrow for csvrow in csvrows if csvrow[int(catcm.cfg_data_column_catid)] in args.catids] # If no rows were retrieved, warn user that cat is not represented in the current data if not csvrows: sys.exit('No CSV data was found after checking cat ids. Cat ids were {}.'.format(','.join(args.catids))) # Create a new DataPool object to work with datapool = catst.STDataPool(args.dot_size, args.perimeter_resolution) # For every row, create a Fix object and add it to the DataPool for csvrow in csvrows: try:
def procbatchform(request): LOGGER.debug('Proceedings import page accessed using method {}'.format(request.method), request) err_message = '' uid = request.user.id uname = request.user.username today = date.today() if request.method == 'POST': button = getbutton(request) if button == 'load': infile = request.FILES.get('load') if not infile: err_message = 'Nejprve zvolte soubor k načtení' else: errors = [] try: count = 0 with infile: idx = 0 for line in csvreader(StringIO(infile.read().decode())): idx += 1 errlen = len(errors) if not line: continue if len(line) != 3: errors.append((idx, 'Chybný formát')) continue desc = line[0].strip() if not desc: errors.append((idx, 'Prázdný popis')) continue if len(desc) > 255: errors.append((idx, 'Příliš dlouhý popis')) continue try: court = line[1] assert Court.objects.get(id=court) except: errors.append((idx, 'Chybná zkratka soudu')) continue try: senate, register, number, year = decomposeref(line[2]) assert senate >= 0 assert register in REGISTERS assert number > 0 assert year >= 1990 and year <= today.year except: errors.append((idx, 'Chybná spisová značka')) continue if len(errors) == errlen: proc = Proceedings.objects.filter( uid_id=uid, desc=desc, court=court, senate=senate, register=register, number=number, year=year) if not proc.exists(): try: proc = Proceedings.objects.update_or_create( uid_id=uid, desc=desc, defaults={ 'court_id': court, 'senate': senate, 'register': register, 'number': number, 'year': year, 'changed': None, 'updated': None, 'hash': '', 'auxid': 0, 'notify': False})[0] updateproc(proc) proc.save() except: errors.append((idx, 'Popisu "{}" odpovídá více než jedno řízení'.format(desc))) continue count += 1 LOGGER.info('User "{}" ({:d}) imported {:d} proceedings'.format(uname, uid, count), request) return render( request, 'szr_procbatchresult.xhtml', {'app': APP, 'page_title': 'Import řízení ze souboru', 'count': count, 'errors': errors}) except: # pragma: no cover LOGGER.error('Error reading file', request) err_message = 'Chyba při načtení souboru' return render( request, 'szr_procbatchform.xhtml', {'app': APP, 'page_title': 'Import řízení ze souboru', 'err_message': err_message})
def import_data(mode='training'): global trn_cols, tst_cols # get input file (features and labels) if len(sys.argv) > 1: if mode == 'training': fname = sys.argv[1] elif len(sys.argv) > 2: fname = sys.argv[2] else: if mode == 'training': fname = default_trn_path else: fname = default_tst_path if not os.path.exists(fname): print "usage:", os.path.split(sys.argv[0])[1], "[default_trn_path]", \ "[default_tst_path]" print "Valid file paths must be provided as an arg or global varables" sys.exit("invalid input") # get heads reader = csvreader(open(fname, 'rb')) r, start_row, heads = 0, 0, [] for row in reader: if r == 0: # get no of cols in data if mode == 'training': trn_cols = len(row) if heads_in_trn_file: heads = row start_row += 1 else: tst_cols = len(row) if heads_in_tst_file: heads = row start_row += 1 r += 1 else: break # build a dict to map each col to a conv func (if not excl) if mode not in ['test', 'predict']: cols = [i for i in range(trn_cols) if i not in excl_trn_cols] conv_dict = {c: conv for c in cols} else: cols = [i for i in range(tst_cols) if i not in excl_tst_cols] conv_dict = {c: conv for c in cols} if verbose: print '\nData import:', mode, '| cols:', cols, '\n' # import data # not excluding unneeded cols, import all, just without conversions # they are exlcuded later in feature_prep data = np.genfromtxt(fname, delimiter=delim, converters=conv_dict, skip_header=start_row) if verbose: print 'all heads:\n', ', '.join(i for i in heads), '\n' print 'shape of data:', np.shape(data) print data return data, heads