Exemple #1
0
    def __init__(self, filename, arg_dropnan=True, text_series='text',
                 processed_series='processed_text', length_series='length',
                 group_series='group_name', cat_series='category_name'):
        """Initialise variables."""
        csv = ps.read_csv(filename, encoding='utf-8')

        try:
            # if text_series column exists
            test_data = csv.dropna(subset=[text_series])[text_series]
            test_string = test_data[len(test_data) // 2]
            if test_string.isnumeric():
                print("WARN: your test data seems to NOT be plain text")
        except KeyError:
            print("ERR: column '", text_series, "'does not exists.")
            print("YOUR CSV:", csv[:0])
            return None

        self.file = filename
        self.dropnan = arg_dropnan
        self.text_series = text_series
        self.processed_series = processed_series
        self.length_series = length_series
        self.group_series = group_series
        self.cat_series = cat_series
        if self.dropnan:
            csv = csv.dropna(subset=[text_series])
        print("YOUR CSV:", csv[:0])
        self.origin = csv
        self.current = csv.copy()
        self.bow = {}
        self.freqw = []
        self.stopwords = []
        self.row_pr_posts = ''
        self.all_text = ''
        self.all_pubs = ''
def updateFollowing(csv):
    f = 0
    friends = api.friends()
    csv = readCSV(csv)
    dicty = csv.copy()

    for friend in friends:
        if friend.screen_name not in csv.keys():
            dicty[friend.screen_name] = 0
            f += 1
    writeCSV(dicty)
    print(f'{f} following updated')
Exemple #3
0
def mainboi(infile, outdir, layoutPriority):
    '''The meat of this script. Converts a single file.'''

    # Step one: figure out what/where

    name = os.path.splitext(os.path.basename(infile))[0]
    score_midi = os.path.join(outdir, name + os.path.extsep + 'mid')

    printv("name: `{}`".format(name), "score_midi: `{}`".format(score_midi))

    if musescore:
        subprocess.run(["mscore", "-o", score_midi, infile],
                       stdout=subprocess.PIPE,
                       stderr=subprocess.PIPE)
    else:
        subprocess.run(["cp", infile, score_midi])

    # now we use csvmidi

    csv = subprocess.run(
        ["midicsv", score_midi],
        stdout=subprocess.PIPE).stdout.decode('ascii').split('\n')

    #printv("*** CSV OUTPUT ***")
    #printv(csv)

    # now we do a bunch of string processing

    ###########################################################
    # first step is to build a set of instrumentation candidates

    #instr = re.compile("(\d+), [\d]+, ")

    patches = {}

    for i in range(len(csv)):
        lsp = csv[i].split(', ')
        #printv(lsp)
        if len(lsp) == 5 and lsp[2] == "Program_c":
            # this line has a synth set
            tracknum = int(lsp[0])
            patches[tracknum] = int(lsp[-1])
            printv(lsp)

        # neutralise volumes
        if len(lsp) > 5 and lsp[2] == "Control_c" and lsp[4] == '7':
            lsp[5] = str(backgroundVolume)
            printv(lsp)

        if len(lsp) == 4 and lsp[2] == "Text_t" and lsp[3] == markerText:
            printv("File already processed!")
            # this file has already been processed!
            return

        csv[i] = ', '.join(lsp)

    printv("Patches: ", patches)

    candidates = []
    chosen_layout = ''

    for layout in layouts:
        if len(layouts[layout]) == len(patches):
            for i in range(len(layouts[layout])):
                partname = layouts[layout][i][0]
                if parts[partname][0] != patches[i + 1]:
                    break
            else:
                candidates.append(layout)

    printv("Candidates: ", candidates)

    if len(candidates) > 1:
        if layoutPriority in candidates:
            chosen_layout = layoutPriority
        else:
            print("Please select a layout for " + name +
                  " or else describe a new one.")
            optstr = ' '.join([
                "[{}]: {},".format(i + 1, candidates[i])
                for i in range(len(candidates))
            ])
            opt = input(optstr + ' [N]ew layout: ').upper().strip()

            if opt.startswith('N'):
                layoutPriority = input("Enter Layout Name: ").strip().lower()
                potentialDef = input("Enter Layout Definition for {}: ".format(
                    layoutPriority)).strip().lower()
                nl = validLayout(layoutPriority, potentialDef, prompt=True)
                npl = sorted(nl.keys())[0]
                layouts[npl] = nl[npl]
                chosen_layout = npl
            elif opt.strip().isdigit():
                chosen_layout = candidates[
                    int(opt) - 1]  # that minus-one is very important
            else:
                print("Selecting option [1]")
                chosen_layout = candidates[0]

    elif len(candidates) == 1:
        chosen_layout = candidates[0]
    else:
        print("Error: no candidate layouts for {}.".format(infile))
        layoutPriority = input("Enter a new Layout Name: ").strip().lower()
        potentialDef = input("Enter Layout Definition for {}: ".format(
            layoutPriority)).strip().lower()
        nl = validLayout(layoutPriority, potentialDef, prompt=True)
        npl = sorted(nl.keys())[0]
        layouts[npl] = nl[npl]
        chosen_layout = layoutPriority

    #########################################################################
    # We have a layout now, in `chosen_layout`

    printv(chosen_layout)

    # now for the real fun... iterating over everything

    for p in layouts[chosen_layout]:
        partname = p[0]
        partid = 0
        for i in range(len(layouts[chosen_layout])):
            if partname == layouts[chosen_layout][i][0]:
                partid = i + 1

        printv(partname, partid)
        number = ''
        if len(p) > 1:
            number = p[1]
        partlabel = ''.join(p)
        filename = os.path.join(
            outdir, name + "_" + partlabel + os.path.extsep + 'mid')

        this_csv = csv.copy()

        this_csv = ["0, 0, Text_t, " + markerText] + this_csv

        for i in range(len(this_csv)):
            lsp = this_csv[i].split(', ')
            # foreground volumes
            #if "Control_c" in this_csv[i]:
            #    printv(lsp)
            if (len(lsp) == 6) and lsp[0] == str(
                    partid) and lsp[2] == "Control_c" and lsp[4] == '7':
                lsp[5] = str(foregroundVolume)
                printv(lsp)

            # instrument swapsies
            if (len(lsp) == 5) and lsp[0] == str(
                    partid) and lsp[2] == "Program_c" and lsp[4] == str(
                        parts[partname][0]):
                lsp[4] = str(parts[partname][1])
                printv(lsp)

            this_csv[i] = ', '.join(lsp)

        subprocess.run(['csvmidi', '-', filename],
                       input='\n'.join(this_csv).encode())

        if args.mp3:
            mp3_name = filename[0:-3] + "mp3"
            subprocess.run(['mscore', '-o', mp3_name, filename],
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
    def perform_data_transformation(self):
        dir_structure = os.walk(self.csv_date_path)
        for info in dir_structure:
            files = info[2]
            break

        csvs = []
        dates = []

        files.sort()

        for file in files:
            if file.startswith("."):
                continue
            csvs.append(pd.read_csv(os.path.join(self.csv_date_path, file)))
            dates.append(file.split(".")[0])

        cols = csvs[0].columns
        traits = []
        for c in cols:
            if c == 'Row IDX' or c == 'Column IDX':
                continue
            else:
                traits.append(c)
        date = csvs[0].iloc[:, 0:2]
        date = pd.concat([date, csvs[0].iloc[:, 3]], axis=1)

        date_base = csvs[0].iloc[:, 0:2]
        count = 0
        trait_csvs = []
        col_names = ['Row IDX', 'Column IDX']
        for t in range(len(traits)):
            t_csv = date_base.copy()
            for i in range(len(dates)):
                t_csv = pd.concat([t_csv, csvs[i].iloc[:, t + 2]], axis=1)
                if t == 0:
                    col_names.append(dates[i])

            t_csv.columns = col_names
            trait_csvs.append(t_csv)
        print(count)

        print(len(trait_csvs))
        print(len(traits))

        trait_csv_path = os.path.join(self.csv_path, "trait_csvs")
        rel_trait_path = os.path.join(self.csv_path, "relative_trait_csvs")
        if not os.path.exists(trait_csv_path):
            os.mkdir(trait_csv_path)
        if not os.path.exists(rel_trait_path):
            os.mkdir(rel_trait_path)

        for i in range(len(trait_csvs)):
            trait_csvs[i].to_csv(path_or_buf=os.path.join(
                trait_csv_path, traits[i] + ".csv"),
                                 index=False,
                                 float_format='%.3f')

        rel_trait_csvs = []
        for csv in trait_csvs:
            csv_copy = csv.copy()
            for c in range(14, 2, -1):
                csv_copy.iloc[:,
                              c] = csv_copy.iloc[:, c] - csv_copy.iloc[:,
                                                                       c - 1]
            csv_copy.iloc[:, 2] = 0
            rel_trait_csvs.append(csv_copy)

        for i in range(len(rel_trait_csvs)):
            rel_trait_csvs[i].to_csv(path_or_buf=os.path.join(
                rel_trait_path, traits[i] + ".csv"),
                                     index=False,
                                     float_format='%.3f')