def navigate(self): subj = self.subject mthd = self.method code = self.code fname = f'db/{subj}/{code}-{mthd}.csv' with open(fname, 'r') as fd: self.reader = csvReader(fd) res = self.parse_file() return res
def buildHHNotesRelations(): melody_path = "./source_data/weapons/weapon_melody_combined.csv" weapons_path = "./source_data/weapons/weapon_base.csv" all_melodies = [] with open(melody_path, 'r') as input: reader = csvReader(input) for idx, row in enumerate(reader): if idx == 0: continue all_melodies.append(row[0]) weapon_melody_dict = {} with open(weapons_path, 'r') as input: reader = csvReader(input) for idx, row in enumerate(reader): if idx == 0 or row[2] != 'hunting-horn': continue weapon_name = row[1] weapon_name = weapon_name.replace('\'', '') print (weapon_name) weapon_notes = row[-3] #print (weapon_name, weapon_notes, all_melodies[0]) weapon_melody = {} for c in weapon_notes: weapon_melody[c] = 1 for melody in all_melodies: feasible = True for c in melody: if c not in weapon_melody: feasible = False break if feasible: if weapon_name not in weapon_melody_dict: weapon_melody_dict[weapon_name] = [melody] else: weapon_melody_dict[weapon_name].append(melody) with open("weapon_all_melodies.json", 'w') as output: dump(weapon_melody_dict, output, separators=(',\n', ':')) idx = 0 for weapon in weapon_melody_dict: #print (weapon, sorted(weapon_melody_dict[weapon], key=lambda x:len(x))) idx += 1 if (idx == 10): break
def getUniqueNotes(): path = "./source_data/weapons/weapon_melody_notes.csv" all_letters = {} with open(path, 'r') as input: reader = csvReader(input) for idx, row in enumerate(reader): if idx == 0: continue notes = row[1] for c in notes: if c not in all_letters: all_letters[c] = 1 print (all_letters.keys())
def listCalls(csvPath: str) -> list: calls = [] with open(csvPath) as dataFile: dataReader = csvReader(dataFile, dialect='excel') for i, splitRow in enumerate(dataReader): if i == 0: Call.setSchema(splitRow) else: calls.append(Call(splitRow)) return calls
def readCsvFile(theFile, comment='#'): """ Reads the global config file and returns an array with the IPs, Ports and configFiles GlobalConfig.csv has the format: IP Port DeviceConfigFileName """ #TODO: Implement this method in a util.py class clients = [] with open(theFile) as csvFile: csvBuffer = csvReader(csvFile, delimiter=',') for line in csvBuffer: if (len(line) > 0): server = line[0] if comment in server: pass else: clients.append(line) return clients
def buildWeaponImageMap(): path = "./source_data/weapons/weapon_base.csv" output_path = "./source_data/weapons/weapon_image_map.json" output_weapons = [] with open(path, 'r') as input: reader = csvReader(input) for idx, row in enumerate(reader): if idx == 0: continue if idx >= 1253: break weapon_name = row[1] output_weapons.append(weapon_name) with open(output_path, 'w') as output: output.write("{\n") line = "\"%s\":\"\",\n" for weapon in output_weapons: weapon = weapon.replace("\"", '').replace("\'", '') output.write(line % weapon) output.write("}")
def importFromCSV(state: str, targetPath: str) -> CompaniesUnderState: ''' CSV dataset to be processed is pretty malformed like we can't just use `,` ( comma ) as field seperator in CSV. Cause there's `,` ( comma ) present in unexpected places, so what we're going to handle that using python's very own `csv` module, while using `excel` as dialect. ''' companiesUnderStateObject = None # this is what's to be returned try: # reads CSV file content in splitted line form, # which is to be processed for creating an instance of Company class with open(targetPath, mode='r', encoding='ISO-8859-1') as fd: companiesUnderStateObject = CompaniesUnderState( state, (Company(*i[:-2]) for i in csvReader(fd.readlines()[1:]))) except Exception: companiesUnderStateObject = None finally: return companiesUnderStateObject
def generateSharpness(): data_path = "./source_data/weapons/weapon_sharpness.csv" output_dir = "./source_data/weapons/sharpness_imgs/" width = 200 height = 15 with open(data_path, 'r') as input: reader = csvReader(input) for idx, row in enumerate(reader): if idx == 0: continue weapon_name = row[0].replace('\"', '').replace('\'', '') #if weapon_name != "Anguish": continue red, orange, yellow, green, blue, white = [int(e) / 2 for e in row[2:8]] img = Image.new("RGB", (width, height), (0, 0, 0)) draw = ImageDraw.Draw(img) tmp = 0 if red != 0: draw.rectangle([(tmp, 0), (tmp + red, height)], fill='red') tmp += red if orange != 0: draw.rectangle([(tmp, 0), (tmp + orange, height)], fill='orange') tmp += orange if yellow != 0: draw.rectangle([(tmp, 0), (tmp + yellow, height)], fill='yellow') tmp += yellow if green != 0: draw.rectangle([(tmp, 0), (tmp + green, height)], fill='green') tmp += green if blue != 0: draw.rectangle([(tmp, 0), (tmp + blue, height)], fill='blue') tmp += blue if white != 0: draw.rectangle([(tmp, 0), (tmp + white, height)], fill='white') tmp += white img_outlined = ImageOps.expand(img, 2, 0) filename = '_'.join([e.lower() for e in weapon_name.split()]) + ".png" output_path = os.path.join(output_dir, filename) img_outlined.save(output_path)
def idna_characters(): # pragma: no cover # type: () -> str """ Returns a string containing IDNA characters. """ global _idnaCharacters if _idnaCharacters is None: result = [] # Data source "IDNA Derived Properties": # https://www.iana.org/assignments/idna-tables-6.3.0/ # idna-tables-6.3.0.xhtml#idna-tables-properties dataFileName = join(dirname(__file__), "idna-tables-properties.csv") with open(dataFileName) as dataFile: reader = csvReader(dataFile, delimiter=",") next(reader) # Skip header row for row in reader: codes, prop, description = row if prop != "PVALID": # CONTEXTO or CONTEXTJ are also allowed, but they come with # rules, so we're punting on those here. # See: https://tools.ietf.org/html/rfc5892 continue startEnd = row[0].split("-", 1) if len(startEnd) == 1: # No end of range given; use start startEnd.append(startEnd[0]) start, end = (int(i, 16) for i in startEnd) for i in range(start, end + 1): if i > maxunicode: break result.append(unichr(i)) _idnaCharacters = u"".join(result) return _idnaCharacters
def load_csv(csv_file_name): header = [] file_dict = [] with open(csv_file_name, newline="") as csvfile: is_header = True csv_reader = csvReader(csvfile) for row in csv_reader: if row is not None and len(row) > 0: input_row = OrderedDict() file_dict.append(input_row) index = 0 for element in row: element = " ".join(element.split()) if is_header: header.append(element) else: input_row[header[index]] = sanitize_value(element) index += 1 is_header = False return file_dict
def buildZipcodes(calls: list) -> dict: zipcodes = {'all': CallSet()} keys = [] for call in calls: zipcodes['all'].add(call) if call.zipcode not in zipcodes: zipcodes[call.zipcode] = CallSet() keys.append(call.zipcode) zipcodes[call.zipcode].add(call) # adds population data to the CallSets keys.sort() currentKeyIndex = 0 totalPopulation = 0 with open(POPULATIONDATA_PATH) as dataFile: dataReader = csvReader(dataFile, dialect='excel') for i, splitRow in enumerate(dataReader): if i == 0: continue if currentKeyIndex >= len(keys): break if int(splitRow[0]) == keys[currentKeyIndex]: zipcodes[keys[currentKeyIndex]].setPopulation(int(splitRow[1])) totalPopulation += int(splitRow[1]) currentKeyIndex += 1 zipcodes['all'].setPopulation(totalPopulation) return zipcodes
def addCsv(self, csvFilePath:str): """ add the schema of a csv file and its data to the data set """ with open(csvFilePath) as dataFile: rows = iter( splitRow for splitRow in csvReader(dataFile, dialect='excel') ) self._schema[csvFilePath] = { attributeName:index for index, attributeName in enumerate(next(rows)) } self.addDataSet( iter( {attrName : splitRow[index] for attrName, index in self._schema[csvFilePath].items()} for splitRow in rows ) )
from csv import reader as csvReader with open('skill_base.csv', 'r') as input: reader = csvReader(input) colors = {} for idx, row in enumerate(reader): if idx == 0: continue color = row[1] colors[color] = 1 print(colors)
## #df.columns=['Timestamp','Ask price','Ask volume','Bid price','Bid volume'] ## df['spread']=df['Ask price'] - df['Bid price'] ## ## ## if os.path.isfile(file_out): ## df.to_csv(file_out,mode='a',header=False,float_format='%.5f') ## else: ## df.to_csv(file_out,mode='w',header=True,float_format='%.5f') start_time = time.time() print(start_time) db = pymongo.MongoClient('192.168.1.22').FXdata db.eurusd_1m.delete_many({}) #reader = DictReader(open('EURUSD_1m_est_up.csv', 'r')) reader = csvReader(open('EURUSD_1m_est_up.csv', 'r')) header = [ 'Ask volume', 'Ask price', 'Bid price', 'Bid volume', 'Timestamp', 'spread' ] bulk = db.eurusd_1m.initialize_ordered_bulk_op() for each in reader: row = {} for field in header: row[field] = each[field] bulk.insert(row) bulk.execute() print(db.eurusd_1m.count()) print("--- %s Add seconds ---" % (time.time() - start_time))
################ print("Topic: " + KafkaConstants.TOPIC) # connect to kafka broker consumer = KafkaConsumer(KafkaConstants.TOPIC, bootstrap_servers=(KafkaConstants.BROKER_IP + ":9092"), group_id=KafkaConstants.GROUP_ID, auto_offset_reset="earliest", consumer_timeout_ms=KafkaConstants.CONSUMER_TIMEOUT) # stream movie rows for msg in consumer: holder = BytesIO(msg.value) # fake file rows = list(csvReader(holder)) # csv to list movieCountRec += len(rows) # add received movies to counter curDate = rows[0][14] # get date of current movies # cannot convert dates before 1990 into dataframe (year out of range) if (curDate[0:2] == "18"): continue movies += rows if (len(movies) < KafkaConstants.CHUNK_SIZE): continue saveMovies() # save last incomplete chunk if (len(movies) > 0): saveMovies() # finish
def importFromCSV(targetPath: str) -> PostOfficeGraph: ''' We just update a list of records, which we've for a certain `PostOffice category`, with second argument passed to this closure Returns a Dict[str, List[List[str]]] ''' def __updateRecordHolderDict__( holder: Dict[str, List[List[str]]], record: List[str]) -> Dict[str, List[List[str]]]: holder.update({record[2]: [record] + holder.get(record[2], [])}) return holder ''' Given an instance of PostOffice, holding details about a certain `H.O`, we try to find out a PostOffice object which is a `S.O` & of given name ( passed as second argument ) ''' def __findSO__(currentHO: PostOffice, SOName: str) -> PostOffice: if not SOName: return currentHO pointer = None for i in currentHO.children: if i.officeName == SOName: pointer = i break return pointer ''' Given the whole PostOfficeGraph, which is still under construction, we're asked to find out an instance of PostOffice, which is a `H.O`, if & only if `SOName` argument is `None` But there may be a situation when we've to find out a `S.O` using `SOName` argument, when we'll simply call closure which is written just above this one, with requested `SOName` & found `H.O` ( PostOffice object ) ''' def __findHO__(graph: PostOfficeGraph, HOName: str, SOName: str = None) -> PostOffice: pointer = None for i in graph.headPostOffices: if i.officeName == HOName: pointer = __findSO__(i, SOName) break return pointer ''' We first find out `H.O` for this `S.O`, and a newly created instance of PostOffice ( of type `S.O` ) and append this instance to children list of `H.O` ''' def __linkSOWithHO__(graph: PostOfficeGraph, currentSO: List[str]) -> PostOfficeGraph: __findHO__(graph, currentSO[12]).children.append( PostOffice(*currentSO[:10], [])) return graph ''' First finding out target `S.O`, then newly created instance of PostOffice ( of type `B.O` ) is linked up with this `S.O` ''' def __linkBOWithSO__(graph: PostOfficeGraph, currentBO: List[str]) -> PostOfficeGraph: __findHO__(graph, currentBO[12], SOName=currentBO[11]).children.append( PostOffice(*currentBO[:10], None)) return graph ''' Finds out target `H.O`, where this `special B.O` reports & they're linked up ''' def __linkSpecialBOWithHO__( graph: PostOfficeGraph, currentSpecialBO: List[str]) -> PostOfficeGraph: __findHO__(graph, currentSpecialBO[12]).children.append( PostOffice(*currentSpecialBO[:10], None)) return graph graph = None try: poList = [] with open(targetPath, mode='r', encoding='ISO-8859-1') as fd: poList = csvReader(fd.readlines()[1:]) holder = reduce( lambda acc, cur: __updateRecordHolderDict__(acc, cur), poList, {}) graph = reduce( lambda acc, cur: __linkSpecialBOWithHO__(acc, cur), holder['B.O directly a/w Head Office'], reduce( lambda acc, cur: __linkBOWithSO__(acc, cur), holder['B.O'], reduce( lambda acc, cur: __linkSOWithHO__(acc, cur), holder['S.O'], PostOfficeGraph([ PostOffice(*i[:10], []) for i in holder['H.O'] ])))) except Exception: graph = None finally: return graph
def insertionsFromCSV(name, table_info): path = table_info["path"] assert pathExists(path), "csv file [%s] missing" % (path) columns = table_info["columns"] if "artificialId" in table_info and table_info["artificialId"]: artificialId = True else: artificialId = False with open(path, 'r') as input: reader = csvReader(input) #number of rows in the csv rows = 0 #Lines of insertion lines = [] col_header_map = {} for idx, row in enumerate(reader): print (idx, row) if idx == 0: for i, header in enumerate(row): col_header_map[header] = i print (col_header_map) continue rows += 1 if "customColumnsMap" in table_info: args = [] if artificialId: line = "(" + (len(table_info["customColumnsMap"])) * "%s," line += "%s)" args.append(idx) else: line = "(" + (len(table_info["customColumnsMap"]) - 1) * "%s," line += "%s)" for key in table_info["customColumnsMap"]: col = table_info["customColumnsMap"][key] col_num = col_header_map[col] item = row[col_num] if len(item) == 0: args.append("NULL") elif item.isdigit(): args.append(item) elif item == "FALSE" or item == "TRUE": args.append('1') if item == "TRUE" else args.append('0') else: args.append("'%s'" % (item.replace('\'', ''))) line = line % (tuple(args)) lines.append(line) else: args = [] if artificialId: line = "(" + (len(row)) * "%s," line += "%s)" args.append(idx) else: line = "(" + (len(row) - 1) * "%s," line += "%s)" for item in row: if len(item) == 0: args.append("NULL") elif item.isdigit(): args.append(item) elif item == "FALSE" or item == "TRUE": args.append('1') if item == "TRUE" else args.append('0') else: args.append("'%s'" % (item.replace('\'', ''))) line = line % (tuple(args)) lines.append(line) col_names = list(columns.keys()) insertion_header = "insert into `%s` (" + (len(col_names) - 1) * "`%s`," + "`%s`) values " insertion_header = insertion_header % tuple([name] + col_names) insertion_body = ','.join(["%s"] * (rows)) + ';' insertion_body = insertion_body % tuple(lines) insertion = insertion_header + insertion_body return insertion
return False def mapper(n): return dict([[int(n), 1]]) def reducer(i, j): for k in j: i[k] = i.get(k, 0) + j.get( k, 0) # .get so we default to 0 if not existent return i with open('P.csv', 'r') as csvFile: reader = csvReader(csvFile, delimiter=';') for row in reader: result = reduce(reducer, map(mapper, filter(filterer, row))) for k, v in sorted(result.items()): print("{0} : {1}".format(k, v)) #%% [markdown] # ## c) # Erstellen Sie ein Histogramm der Zahlenhäufigkeiten so, dass auf der x-Achse die Zahlen der Größe nach angeordnet sind! #%% from csv import reader as csvReader from matplotlib import pyplot as plt def filterer(string):
if tag in nounTags: return 'n' elif tag in verbTags: return 'v' elif tag in adjTags: return 'a' elif tag in advTags: return 'r' else: return None resultsFile = open('corpusData.csv','r') # files that we have already looked at skipOver = [row[0] for row in csvReader(resultsFile, delimiter=',')] # some screwy stuff was happening resultsFile.close() resultsFile = open('corpusData.csv','a+') for f in listdir('corpus/'): if f[-4:] == ".txt" and not f in skipOver: fileName = f F = open('corpus/'+f) text = F.read() F.close() alphanum = letters+octdigits
import KafkaConstants from time import sleep from csv import reader as csvReader, writer as csvWriter from io import BytesIO from itertools import groupby from collections import deque from kafka import KafkaProducer from datetime import datetime as date, timedelta print("Topic: " + KafkaConstants.TOPIC) creditRows = list(csvReader( open("credits.csv"))) # get credit csv rows as list creditRows.pop(0) # remove column names row credits = {credit[2]: credit[:2] for credit in creditRows} # convert to dictionary with id as key movies = list(csvReader( open("movies_metadata.csv"))) # get movie csv rows as list cols = len(movies[0]) # get column count movies.pop(0) # remove column names row print("Movies: " + str(len(movies))) # remove invalid rows (not enough values, no date) movies = filter(lambda ele: len(ele) == cols and ele[14], movies) # movies = filter(lambda ele: ele[14] == "1898-01-01" or ele[14] == "1874-12-09", movies) # multiple movies at the same day + only one movie (for debugging) movies = sorted(movies, key=lambda line: line[14]) # sort by date print("Movies: " + str(len(movies)) + " (after filter)") producer = KafkaProducer( bootstrap_servers=(KafkaConstants.BROKER_IP +
def rewriteArmorBase(): path = "./source_data/armors/armorset_base.csv" out_path = "./source_data/armors/armorset_modified.csv" set_dict = {} with open(path, 'r') as input: reader = csvReader(input) header = None for idx, row in enumerate(reader): if idx == 0: header = [row[0]] + row[2:] continue if row[1] == "LR": continue set_name = row[0] words = set_name.split() #print (armor_name, " ", [[ord(l) for l in e] for e in words]) name = [] for w in words: if len(w) == 1 and ord(w[0]) > 20000: continue name.append(w) name = '_'.join(name) info = row[2:] if name not in set_dict: set_dict[name] = [(1, info)] else: curr = set_dict[name][-1][0] set_dict[name].append((curr + 1, info)) for key in set_dict: print (key, set_dict[key]) pass out_file = open(out_path, 'w', newline='') writer = csvWriter(out_file) writer.writerow(header) suffix = {1: "alpha", 2: "beta", 3: "gamma"} for key in set_dict: row = None if len(set_dict[key]) == 1: set_name = key info = set_dict[key][0][1] for idx, part in enumerate(info): if idx == 0 or idx == 6: continue words = part.split() actual_name = [] for w in words: if len(w) == 1 and ord(w[0]) > 20000: continue actual_name.append(w) actual_name = '_'.join(actual_name) info[idx] = actual_name row = [set_name] + info writer.writerow(row) else: for info in set_dict[key]: suff = suffix[info[0]] set_name = '_'.join([key, suff]) for idx, part in enumerate(info[1]): if idx == 0 or idx == 6 or part == "": continue words = part.split() actual_name = [] for w in words: if len(w) == 1 and ord(w[0]) > 20000: continue actual_name.append(w) actual_name = '_'.join(actual_name + [suff]) info[1][idx] = actual_name row = [set_name] + info[1] writer.writerow(row) print ("done")
## ## ## if os.path.isfile(file_out): ## df.to_csv(file_out,mode='a',header=False,float_format='%.5f') ## else: ## df.to_csv(file_out,mode='w',header=True,float_format='%.5f') start_time = time.time() print(start_time) db = pymongo.MongoClient('192.168.1.22').FXdata db.eurusd_1m.delete_many({}) #reader = DictReader(open('EURUSD_1m_est_up.csv', 'r')) reader = csvReader(open('EURUSD_1m_est_up.csv', 'r')) header=['Ask volume','Ask price','Bid price','Bid volume','Timestamp','spread'] bulk = db.eurusd_1m.initialize_ordered_bulk_op() for each in reader: row={} for field in header: row[field]=each[field] bulk.insert(row) bulk.execute() print(db.eurusd_1m.count()) print("--- %s Add seconds ---" % (time.time() - start_time)) ##