def __init__(self, filename, arg_dropnan=True, text_series='text', processed_series='processed_text', length_series='length', group_series='group_name', cat_series='category_name'): """Initialise variables.""" csv = ps.read_csv(filename, encoding='utf-8') try: # if text_series column exists test_data = csv.dropna(subset=[text_series])[text_series] test_string = test_data[len(test_data) // 2] if test_string.isnumeric(): print("WARN: your test data seems to NOT be plain text") except KeyError: print("ERR: column '", text_series, "'does not exists.") print("YOUR CSV:", csv[:0]) return None self.file = filename self.dropnan = arg_dropnan self.text_series = text_series self.processed_series = processed_series self.length_series = length_series self.group_series = group_series self.cat_series = cat_series if self.dropnan: csv = csv.dropna(subset=[text_series]) print("YOUR CSV:", csv[:0]) self.origin = csv self.current = csv.copy() self.bow = {} self.freqw = [] self.stopwords = [] self.row_pr_posts = '' self.all_text = '' self.all_pubs = ''
def updateFollowing(csv): f = 0 friends = api.friends() csv = readCSV(csv) dicty = csv.copy() for friend in friends: if friend.screen_name not in csv.keys(): dicty[friend.screen_name] = 0 f += 1 writeCSV(dicty) print(f'{f} following updated')
def mainboi(infile, outdir, layoutPriority): '''The meat of this script. Converts a single file.''' # Step one: figure out what/where name = os.path.splitext(os.path.basename(infile))[0] score_midi = os.path.join(outdir, name + os.path.extsep + 'mid') printv("name: `{}`".format(name), "score_midi: `{}`".format(score_midi)) if musescore: subprocess.run(["mscore", "-o", score_midi, infile], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: subprocess.run(["cp", infile, score_midi]) # now we use csvmidi csv = subprocess.run( ["midicsv", score_midi], stdout=subprocess.PIPE).stdout.decode('ascii').split('\n') #printv("*** CSV OUTPUT ***") #printv(csv) # now we do a bunch of string processing ########################################################### # first step is to build a set of instrumentation candidates #instr = re.compile("(\d+), [\d]+, ") patches = {} for i in range(len(csv)): lsp = csv[i].split(', ') #printv(lsp) if len(lsp) == 5 and lsp[2] == "Program_c": # this line has a synth set tracknum = int(lsp[0]) patches[tracknum] = int(lsp[-1]) printv(lsp) # neutralise volumes if len(lsp) > 5 and lsp[2] == "Control_c" and lsp[4] == '7': lsp[5] = str(backgroundVolume) printv(lsp) if len(lsp) == 4 and lsp[2] == "Text_t" and lsp[3] == markerText: printv("File already processed!") # this file has already been processed! return csv[i] = ', '.join(lsp) printv("Patches: ", patches) candidates = [] chosen_layout = '' for layout in layouts: if len(layouts[layout]) == len(patches): for i in range(len(layouts[layout])): partname = layouts[layout][i][0] if parts[partname][0] != patches[i + 1]: break else: candidates.append(layout) printv("Candidates: ", candidates) if len(candidates) > 1: if layoutPriority in candidates: chosen_layout = layoutPriority else: print("Please select a layout for " + name + " or else describe a new one.") optstr = ' '.join([ "[{}]: {},".format(i + 1, candidates[i]) for i in range(len(candidates)) ]) opt = input(optstr + ' [N]ew layout: ').upper().strip() if opt.startswith('N'): layoutPriority = input("Enter Layout Name: ").strip().lower() potentialDef = input("Enter Layout Definition for {}: ".format( layoutPriority)).strip().lower() nl = validLayout(layoutPriority, potentialDef, prompt=True) npl = sorted(nl.keys())[0] layouts[npl] = nl[npl] chosen_layout = npl elif opt.strip().isdigit(): chosen_layout = candidates[ int(opt) - 1] # that minus-one is very important else: print("Selecting option [1]") chosen_layout = candidates[0] elif len(candidates) == 1: chosen_layout = candidates[0] else: print("Error: no candidate layouts for {}.".format(infile)) layoutPriority = input("Enter a new Layout Name: ").strip().lower() potentialDef = input("Enter Layout Definition for {}: ".format( layoutPriority)).strip().lower() nl = validLayout(layoutPriority, potentialDef, prompt=True) npl = sorted(nl.keys())[0] layouts[npl] = nl[npl] chosen_layout = layoutPriority ######################################################################### # We have a layout now, in `chosen_layout` printv(chosen_layout) # now for the real fun... iterating over everything for p in layouts[chosen_layout]: partname = p[0] partid = 0 for i in range(len(layouts[chosen_layout])): if partname == layouts[chosen_layout][i][0]: partid = i + 1 printv(partname, partid) number = '' if len(p) > 1: number = p[1] partlabel = ''.join(p) filename = os.path.join( outdir, name + "_" + partlabel + os.path.extsep + 'mid') this_csv = csv.copy() this_csv = ["0, 0, Text_t, " + markerText] + this_csv for i in range(len(this_csv)): lsp = this_csv[i].split(', ') # foreground volumes #if "Control_c" in this_csv[i]: # printv(lsp) if (len(lsp) == 6) and lsp[0] == str( partid) and lsp[2] == "Control_c" and lsp[4] == '7': lsp[5] = str(foregroundVolume) printv(lsp) # instrument swapsies if (len(lsp) == 5) and lsp[0] == str( partid) and lsp[2] == "Program_c" and lsp[4] == str( parts[partname][0]): lsp[4] = str(parts[partname][1]) printv(lsp) this_csv[i] = ', '.join(lsp) subprocess.run(['csvmidi', '-', filename], input='\n'.join(this_csv).encode()) if args.mp3: mp3_name = filename[0:-3] + "mp3" subprocess.run(['mscore', '-o', mp3_name, filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
def perform_data_transformation(self): dir_structure = os.walk(self.csv_date_path) for info in dir_structure: files = info[2] break csvs = [] dates = [] files.sort() for file in files: if file.startswith("."): continue csvs.append(pd.read_csv(os.path.join(self.csv_date_path, file))) dates.append(file.split(".")[0]) cols = csvs[0].columns traits = [] for c in cols: if c == 'Row IDX' or c == 'Column IDX': continue else: traits.append(c) date = csvs[0].iloc[:, 0:2] date = pd.concat([date, csvs[0].iloc[:, 3]], axis=1) date_base = csvs[0].iloc[:, 0:2] count = 0 trait_csvs = [] col_names = ['Row IDX', 'Column IDX'] for t in range(len(traits)): t_csv = date_base.copy() for i in range(len(dates)): t_csv = pd.concat([t_csv, csvs[i].iloc[:, t + 2]], axis=1) if t == 0: col_names.append(dates[i]) t_csv.columns = col_names trait_csvs.append(t_csv) print(count) print(len(trait_csvs)) print(len(traits)) trait_csv_path = os.path.join(self.csv_path, "trait_csvs") rel_trait_path = os.path.join(self.csv_path, "relative_trait_csvs") if not os.path.exists(trait_csv_path): os.mkdir(trait_csv_path) if not os.path.exists(rel_trait_path): os.mkdir(rel_trait_path) for i in range(len(trait_csvs)): trait_csvs[i].to_csv(path_or_buf=os.path.join( trait_csv_path, traits[i] + ".csv"), index=False, float_format='%.3f') rel_trait_csvs = [] for csv in trait_csvs: csv_copy = csv.copy() for c in range(14, 2, -1): csv_copy.iloc[:, c] = csv_copy.iloc[:, c] - csv_copy.iloc[:, c - 1] csv_copy.iloc[:, 2] = 0 rel_trait_csvs.append(csv_copy) for i in range(len(rel_trait_csvs)): rel_trait_csvs[i].to_csv(path_or_buf=os.path.join( rel_trait_path, traits[i] + ".csv"), index=False, float_format='%.3f')