Python Preferences Examples

Programming Language: Python

Namespace/Package Name: settings

Class/Type: Preferences

Examples at hotexamples.com: 19

Python Preferences - 19 examples found. These are the top rated real world Python examples of settings.Preferences extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Preferences(9)

get(6)

getwpath(6)

Example #1

Show file

File: altpre1sort-folder-of-torrents-by-tracker.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()  # settings.1py

    directory_path = os.path.join(
        ss.get("maindir"), u"All-Torrs\\"
    )  # needs a unicode symbol so os. commands work at all on paths with funny chars

    files = [
        os.path.join(directory_path, fn) for fn in next(os.walk(directory_path))[2]
    ]  # gives absolute paths + names

    torrentnamelist = []
    for eachfile in files:
        with open(eachfile, "rb") as stringfile:
            try:
                torrent = bencode.decode(stringfile.read())
                for key, value in torrent.iteritems():
                    if key == "announce":
                        announce = value
                        domain = "{uri.netloc}".format(uri=urlparse(announce))
                        colon = domain.find(":", 0)
                        if colon != -1:
                            domain = domain[:colon]
                        if domain:
                            tracker = domain  # only using 1 value here(lazy)
                    elif key == "announce-list":
                        tracker = "Multiple Trackers"
            except:
                tracker = "None"
        torrentfilename = eachfile[eachfile.rfind("\\") + 1 :]

        if not os.path.exists(directory_path + tracker):
            os.makedirs(directory_path + tracker)
        os.rename(eachfile, os.path.join(directory_path + tracker + "\\" + torrentfilename))

Example #2

Show file

def main():

    ss = Preferences()

    torrentlist = bencode.decode_from_file(ss.get("utresumedat"))
    partiallist = [
    ]  # set up an empty container for desired data to get put into for later

    fileguarduseless = torrentlist.pop(".fileguard", None)
    rec = torrentlist.pop("rec", None)  #Remove this.
    #(dict. comprehension expects only dicts as the root keys)
    #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup = {
        base64.b16encode(value["info"]):
        [value["path"], value["caption"], origkey]
        for origkey, value in torrentlist.iteritems()
    }
    for thehash, value in reverselookup.iteritems():
        partiallist.append([value[0], value[1], thehash])

    partiallist.sort()
    writelistfile = open(
        os.path.join(ss.get("maindir"), "TorrentList.txt"),
        'wb')  # write-out a text file with one entry per line.
    for eachline in partiallist:
        writelistfile.write(eachline[0] + " / " + eachline[1] + " / " +
                            eachline[2] + "\n")
        #path           /   #caption          /     #infohash
    writelistfile.close()
    print "Finished writing: TorrentList.txt"

Example #3

Show file

File: 1analyze-folder-of-torrents.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()
    script1sourcedir = ss.getwpath("script1sourcedir")            #("seeding\")
    files = [os.path.join(script1sourcedir,filename) for filename in next(os.walk(script1sourcedir))[2]]        #gives absolute paths + names

    currentfile = 0

    container = []    #set up an empty container for desired data to get put into for later
    for eachfile in files:

        metainfo = decoder.decode_from_file(eachfile)
        # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash
        infodict = metainfo[b'info']
        info_hash = hashlib.sha1(encode.encode(infodict)).hexdigest().upper()

        internalname = infodict[b'name']
        torrentfilename = eachfile[eachfile.rfind("\\")+1:]
        locextension = torrentfilename.find(".torrent")           #location of extension (char position)
        locid = torrentfilename.rfind("-")+1                      #location of torrentID (char position)
        torrentid = torrentfilename[locid:locextension]           #grab torrentID 
      
        container.append([torrentfilename, internalname, info_hash, torrentid])
        currentfile += 1 
        print(currentfile, torrentfilename.encode('ascii', errors='ignore').decode())        #console output is ascii only, cannot print unicode - chars are omitted

    #WRITE FILE 1
    writelistfile = codecs.open(ss.getwpath("outpath1"),'wb',"utf-8") # write-out a text file with torrentID and Hash (on one line) ("1seeding_ID+Hash+Filename.txt")
    for eachline in container:
        writelistfile.write(eachline[3] + " / " + eachline[2] + " / " + eachline[0] + "\n")     #output torrentID / Hash / torrentfilename
    writelistfile.close()

Example #4

Show file

def main():

    ss = Preferences()

    torrentlist = bencode.decode_from_file(ss.get("utresumedat"))
    partiallist = []    # set up an empty container for desired data to get put into for later

    fileguarduseless = torrentlist.pop(b".fileguard",None)
    rec = torrentlist.pop(b"rec",None)   #Remove this. 
    #(dict. comprehension expects only dicts as the root keys)
    #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup={base64.b16encode(value[b"info"]):[value[b"path"],value[b"caption"],origkey] for origkey,value in torrentlist.items()}
    for thehash,value in reverselookup.items():
        partiallist.append([value[0].decode('utf-8'),value[1].decode('utf-8'),thehash.decode('utf-8')])
    #Those 3 lines replace all of this:
    # for key,value in torrentlist.items():
    #     sentinel = False    # reset before each while-loop
    #     if b"path" in value:
    #         path = value[b"path"].decode('utf-8')
    #     if b"caption" in value:
    #         caption = value[b"caption"].decode('utf-8')
    #     if b"info" in value:
    #         infoHash = base64.b16encode(value[b"info"]).decode('utf-8')
    #         sentinel = True  # need this because theres other dictionaries INside each file-entries' dict...
    #                          # and this will trigger the partiallist.append to write only file-entry dicts.
    #     if sentinel == True:
    #         partiallist.append([path,caption,infoHash])

    partiallist.sort()
    writelistfile = open(os.path.join(ss.get("maindir"),"TorrentList.txt"),'w',encoding='utf-8') # write-out a text file with one entry per line.
    for eachline in partiallist:
        writelistfile.write(eachline[0] + " / " + eachline[1] + " / " + eachline[2] + "\n")
        					#path 			/	#caption		  /		#infohash
    writelistfile.close()

Example #5

Show file

File: post3makelist-from-hashgrabs.py Project: genbtc/whatCD-API-utorrent-renamer

def main():

    ss = Preferences()
    directory_path = ss.getwpath("script3destdir")  #("hash-grabs-as-filenames" dir)
    allfiles = [os.path.join(directory_path,fn) for fn in next(os.walk(directory_path))[2]]        #gives absolute paths + names

    writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8") #("3propernames.txt" file)
    for hashidfilename in allfiles:  #iterate through filenames of what.cd JSON data
        with open(hashidfilename,'r') as stringfile: #open them
            response = json.load(stringfile)
            torrentHash= response["torrent"]["infoHash"]     #grab the hash To compare.            
            writelistfile.write(hashidfilename[hashidfilename.rfind("\\")+1:] + " / " + torrentHash + "\n")            #File Output. The Master List file of the names and hashes.
    writelistfile.close()

Example #6

Show file

File: read-BencResumedat.py Project: genbtc/whatCD-API-utorrent-renamer

def main():

    ss = Preferences()

    torrentlist = bencode.decode_from_file(ss.get("utresumedat"))
    partiallist = []    # set up an empty container for desired data to get put into for later

    fileguarduseless = torrentlist.pop(".fileguard",None)
    rec = torrentlist.pop("rec",None)   #Remove this. 
    #(dict. comprehension expects only dicts as the root keys)
    #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup={base64.b16encode(value["info"]):[value["path"],value["caption"],origkey] for origkey,value in torrentlist.iteritems()}
    for thehash,value in reverselookup.iteritems():
        partiallist.append([value[0],value[1],thehash])

    partiallist.sort()
    writelistfile = open(os.path.join(ss.get("maindir"),"TorrentList.txt"),'wb') # write-out a text file with one entry per line.
    for eachline in partiallist:
        writelistfile.write(eachline[0] + " / " + eachline[1] + " / " + eachline[2] + "\n")
                            #path           /   #caption          /     #infohash
    writelistfile.close()
    print "Finished writing: TorrentList.txt"

Example #7

Show file

File: altpre1sort-folder-of-torrents-by-tracker.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()  #settings.1py

    directory_path = os.path.join(
        ss.get("maindir"), u"All-Torrs\\"
    )  #needs a unicode symbol so os. commands work at all on paths with funny chars

    files = [
        os.path.join(directory_path, fn)
        for fn in next(os.walk(directory_path))[2]
    ]  #gives absolute paths + names

    torrentnamelist = []
    for eachfile in files:
        with open(eachfile, 'rb') as stringfile:
            try:
                torrent = bencode.decode(stringfile.read())
                for key, value in torrent.iteritems():
                    if key == "announce":
                        announce = value
                        domain = '{uri.netloc}'.format(uri=urlparse(announce))
                        colon = domain.find(':', 0)
                        if colon != -1:
                            domain = domain[:colon]
                        if domain:
                            tracker = domain  #only using 1 value here(lazy)
                    elif key == "announce-list":
                        tracker = "Multiple Trackers"
            except:
                tracker = "None"
        torrentfilename = eachfile[eachfile.rfind("\\") + 1:]

        if not os.path.exists(directory_path + tracker):
            os.makedirs(directory_path + tracker)
        os.rename(
            eachfile,
            os.path.join(directory_path + tracker + "\\" + torrentfilename))

Example #8

Show file

def main():
    ss = Preferences()

    currentline = 0  #to resume a broken download. set this to the last SUCCESSFUL number (due to 1 starting at 0) that you see was outputted to console
    try:
        cookies = pickle.load(
            open(ss.getwpath("cookiesfile"),
                 'rb'))  #cookies speed up the HTTP (supposedly)
    except:
        cookies = None  #if we cant load it, don't use it.
    credentials = open(ss.getwpath("credentialsfile"), 'rb').readlines(
    )  #store credentials in another file and .git-ignore it
    username = credentials[0].strip()
    password = credentials[1].strip()

    apihandle = whatapi.WhatAPI(config_file=None,
                                username=username,
                                password=password,
                                cookies=cookies)

    filenamewithIDs = ss.getwpath(
        "outpath1")  # ("1seeding_ID+Hash+Filename.txt")
    hashdir = ss.getwpath("script2destdir")  #output dir

    openedfile = open(filenamewithIDs, 'r', encoding='utf-8').readlines()
    for eachline in islice(openedfile, currentline,
                           None):  #will continue where it left off
        idandhash = eachline.strip().split(' / ')
        currentID = idandhash[0]
        currentHash = idandhash[1]
        if not os.path.exists(os.path.join(hashdir, currentHash)):
            #currentHash = "E7A5718EC52633FCCB1EA85656AA0622543994D7"   #test hash for debugging
            try:
                response = apihandle.request(1.75, "torrent", id=currentID)[
                    "response"]  #talk to server and receive a response
            except whatapi.RequestException as e:
                currentline += 1
                print(currentline,
                      " ERROR. Your search did not match anything.")
                continue
            with open(os.path.join(hashdir, currentHash), 'w') as outfile:
                json.dump(response, outfile, sort_keys=True)
            currentline += 1
            print(currentline, ": ", currentID)

    pickle.dump(apihandle.session.cookies,
                open(ss.getwpath("cookiesfile"),
                     'wb'))  #store cookies when script ends, for next-run.
    print("Download Complete.")

Example #9

Show file

File: alt2WhatCD-API_Downloader(by Hash).py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()

    currentline = (
        0
    )  # to resume a broken download. set this to the last SUCCESSFUL number (due to 1 starting at 0) that you see was outputted to console

    try:
        cookies = pickle.load(open(ss.getwpath("cookiesfile"), "rb"))  # cookies speed up the HTTP (supposedly)
    except:
        cookies = None  # if we cant load it, don't use it.
    credentials = open(
        ss.getwpath("credentialsfile"), "rb"
    ).readlines()  # store credentials in another file and .git-ignore it
    username = credentials[0].strip()
    password = credentials[1].strip()

    apihandle = whatapi.WhatAPI(config_file=None, username=username, password=password, cookies=cookies)

    filenamewithIDs = ss.getwpath("outpath1")  # ("1seeding_ID+Hash+Filename.txt")
    hashdir = ss.getwpath("script2destdir")  # output dir

    openedfile = open(filenamewithIDs, "r").readlines()
    for eachline in islice(openedfile, currentline, None):  # will continue where it left off
        idandhash = eachline.strip().split(" / ")
        currentID = idandhash[0]
        currentHash = idandhash[1]
        if not os.path.exists(os.path.join(hashdir, currentHash)):
            # currentHash = "E7A5718EC52633FCCB1EA85656AA0622543994D7"   #test hash for debugging
            try:
                response = apihandle.request(0, "torrent", hash=currentHash)[
                    "response"
                ]  # talk to server and receive a response. the 0 means time.sleep(0).
            except whatapi.RequestException as e:
                currentline += 1
                print currentline, " ERROR. Your search did not match anything."
                continue
            with open(os.path.join(hashdir, currentHash), "w") as outfile:
                json.dump(response, outfile, sort_keys=True)
            currentline += 1
            print currentline, ": ", currentHash

    pickle.dump(
        apihandle.session.cookies, open(ss.getwpath("cookiesfile"), "wb")
    )  # store cookies when script ends, for next-run.
    print "Download Complete."

Example #10

Show file

def main():
    ss = Preferences()
    script1sourcedir = ss.getwpath(
        u"script1sourcedir"
    ) + u''  #("seeding\"), needs unicode u for file opening.
    files = [
        os.path.join(script1sourcedir, filename)
        for filename in next(os.walk(script1sourcedir))[2]
    ]  #gives absolute paths + names

    currentfile = 0

    container = [
    ]  #set up an empty container for desired data to get put into for later
    for eachfile in files:

        metainfo = bencode.decode_from_file(eachfile)
        # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash
        infodict = metainfo['info']
        info_hash = hashlib.sha1(bencode.bencode(infodict)).hexdigest().upper()

        internalname = infodict['name']
        torrentfilename = eachfile[eachfile.rfind("\\") + 1:]
        locextension = torrentfilename.find(
            ".torrent")  #location of extension (char position)
        locid = torrentfilename.rfind(
            "-") + 1  #location of torrentID (char position)
        torrentid = torrentfilename[locid:locextension]  #grab torrentID

        torrentfilename = torrentfilename[:locid - 1]

        #####-------------replace banned characters with unicode section-----------------######
        ###
        # Forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it.
        torrentfilename = torrentfilename.replace(
            " / ", u"／")  # U+FFOF  (wide)       FULLWIDTH SOLIDUS
        # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth)
        torrentfilename = torrentfilename.replace(
            "/", u" ∕ ")  # U+2215  (narrow)     DIVISION SLASH
        # Backslash (requires two slashes in python)
        torrentfilename = torrentfilename.replace(
            "\\", u"＼")  # U+FF3C               FULLWIDTH REVERSE SOLIDUS
        # Colon
        torrentfilename = torrentfilename.replace(
            ":", u"꞉")  # U+A789               MODIFIER LETTER COLON
        # asterisk
        torrentfilename = torrentfilename.replace(
            "*", u"※")  # U+203B               REFERENCE MARK
        # question mark (replacement is backwards, sorry)
        torrentfilename = torrentfilename.replace(
            "?", u"؟")  # U+061F               ARABIC QUESTION MARK
        # Double-quote
        torrentfilename = torrentfilename.replace(
            '"', u"ʺ")  # U+02BA               MODIFIER LETTER DOUBLE PRIME
        # Left angle bracket
        torrentfilename = torrentfilename.replace(
            "<", u"˂")  # U+02C2               MODIFIER LETTER LEFT ARROWHEAD
        # right angle bracket
        torrentfilename = torrentfilename.replace(
            ">", u"˃")  # U+02C3               MODIFIER LETTER RIGHT ARROWHEAD
        # Pipe
        torrentfilename = torrentfilename.replace(
            "|", u"ǀ")  # U+01C0               LATIN LETTER DENTAL CLICK
        ###
        #####----------windows filename banned chars replacement with unicode-----------######

        container.append([torrentfilename, internalname, info_hash, torrentid])
        currentfile += 1
        print currentfile, torrentfilename.encode('ascii', errors='ignore')

    print "\nReminder: Console output is ascii only, Cannot Print Unicode. (chars omitted)"
    ##File Output. The Master List file of everything.##
    # when the loop exits, Sort it, and write it to the file.
    container.sort()
    writelistfile = codecs.open(
        ss.getwpath("outpath3"), 'wb', "utf-8"
    )  # write-out a text file with one entry per line. main output file (3propernames.txt)
    for eachline in container:
        writelistfile.write(eachline[0] + " / " + eachline[2] +
                            "\n")  #torrentname  / infohash
    writelistfile.close()
    print "Completed. Unicode File Written to: ", os.path.basename(
        ss.getwpath("outpath3"))

Example #11

Show file

    def _calc_terminal_scores(self, w):
        """ Calculate the score for each possible terminal/token match """

        # First pass: for each token, find the possible terminals that
        # can correspond to that token
        finals = defaultdict(set)
        tokens = dict()
        self._find_options(w, finals, tokens)

        # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token
        scores = dict()

        # Loop through the indices of the tokens spanned by this tree
        for i in range(w.start, w.end):

            s = finals[i]
            # Initially, each alternative has a score of 0
            scores[i] = {terminal: 0 for terminal in s}

            #print("Reducing token '{0}'; scores dict initialized to:\n{1}".format(tokens[i].t1, scores[i]))

            if len(s) <= 1:
                # No ambiguity to resolve here
                continue

            # More than one terminal in the option set for the token at index i
            # Calculate the relative scores
            # Find out whether the first part of all the terminals are the same
            same_first = len(set(terminal.first for terminal in s)) == 1
            txt = tokens[i].lower
            # No need to check preferences if the first parts of all possible terminals are equal
            # Look up the preference ordering from Reynir.conf, if any
            prefs = None if same_first else Preferences.get(txt)
            found_pref = False
            sc = scores[i]
            if prefs:
                adj_worse = defaultdict(int)
                adj_better = defaultdict(int)
                for worse, better, factor in prefs:
                    for wt in s:
                        if wt.first in worse:
                            for bt in s:
                                if wt is not bt and bt.first in better:
                                    if bt.name[0] in "\"'":
                                        # Literal terminal: be even more aggressive in promoting it
                                        adj_w = -2 * factor
                                        adj_b = +6 * factor
                                    else:
                                        adj_w = -2 * factor
                                        adj_b = +4 * factor
                                    adj_worse[wt] = min(adj_worse[wt], adj_w)
                                    adj_better[bt] = max(adj_better[bt], adj_b)
                                    found_pref = True
                for wt, adj in adj_worse.items():
                    #print("Token '{2}': Adjusting score of terminal '{0}' by {1}".format(wt, adj, txt))
                    sc[wt] += adj
                for bt, adj in adj_better.items():
                    #print("Token '{2}': Adjusting score of terminal '{0}' by {1}".format(bt, adj, txt))
                    sc[bt] += adj
            #if not same_first and not found_pref:
            #    # Only display cases where there might be a missing pref
            #    print("Token '{0}' has {1} possible terminal matches: {2}".format(txt, len(s), s))

            # Apply heuristics to each terminal that potentially matches this token
            for t in s:
                tfirst = t.first
                if tfirst == "ao" or tfirst == "eo":
                    # Subtract from the score of all ao and eo
                    sc[t] -= 1
                elif tfirst == "no":
                    if t.is_singular:
                        # Add to singular nouns relative to plural ones
                        sc[t] += 1
                    elif t.is_abbrev:
                        # Punish abbreviations in favor of other more specific terminals
                        sc[t] -= 1
                elif tfirst == "fs":
                    if t.has_variant("nf"):
                        # Reduce the weight of the 'artificial' nominative prepositions
                        # 'næstum', 'sem', 'um'
                        sc[t] -= 5  # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5)
                    elif txt == "við" and t.has_variant("þgf"):
                        sc[t] += 1  # Smaller bonus for við + þgf (is rarer than við + þf)
                    elif txt == "sem" and t.has_variant("þf"):
                        sc[t] -= 6  # Even less attractive than sem_nf
                    else:
                        # Else, give a bonus for each matched preposition
                        sc[t] += 2
                elif tfirst == "so":
                    if t.variant(0) in "012":
                        # Consider verb arguments
                        # Normally, we give a bonus for verb arguments: the more matched, the better
                        numcases = int(t.variant(0))
                        adj = 2 * numcases
                        # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd'
                        if numcases == 0:
                            # Zero arguments: we might not like this
                            if all((m.stofn not in VerbObjects.VERBS[0]) and (
                                    "MM" not in m.beyging)
                                   for m in tokens[i].t2 if m.ordfl == "so"):
                                # No meaning where the verb has zero arguments
                                adj = -5
                        # Apply score adjustments for verbs with particular object cases,
                        # as specified by $score(n) pragmas in Verbs.conf
                        # In the (rare) cases where there are conflicting scores,
                        # apply the most positive adjustment
                        adjmax = 0
                        for m in tokens[i].t2:
                            if m.ordfl == "so":
                                key = m.stofn + t.verb_cases
                                score = VerbObjects.SCORES.get(key)
                                if score is not None:
                                    adjmax = score
                                    break
                        sc[t] += adj + adjmax
                    if t.is_sagnb:
                        # We like sagnb and lh, it means that more
                        # than one piece clicks into place
                        sc[t] += 6
                    elif t.is_lh:
                        # sagnb is preferred to lh, but vb (veik beyging) is discouraged
                        if t.has_variant("vb"):
                            sc[t] -= 2
                        else:
                            sc[t] += 3
                    elif t.is_mm:
                        # Encourage mm forms. The encouragement should be better than
                        # the score for matching a single case, so we pick so_0_mm
                        # rather than so_1_þgf, for instance.
                        sc[t] += 3
                    elif t.is_vh:
                        # Encourage vh forms
                        sc[t] += 2
                    if t.is_subj:
                        # Give a small bonus for subject matches
                        if t.has_variant("none"):
                            # ... but a punishment for subj_none
                            sc[t] -= 3
                        else:
                            sc[t] += 1
                    if t.is_nh:
                        if (i > 0) and any(pt.first == 'nhm'
                                           for pt in finals[i - 1]):
                            # Give a bonus for adjacent nhm + so_nh terminals
                            sc[t] += 4  # Prop up the verb terminal with the nh variant
                            for pt in scores[i - 1].keys():
                                if pt.first == 'nhm':
                                    # Prop up the nhm terminal
                                    scores[i - 1][pt] += 2
                                    # print("Propping up nhm for verb {1}, score is now {0}".format(scores[i-1][pt], tokens[i].t1))
                                    break
                        if any(pt.first == "no" and pt.has_variant("ef")
                               and pt.is_plural for pt in s):
                            # If this is a so_nh and an alternative no_ef_ft exists, choose this one
                            # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða')
                            sc[t] += 4
                elif tfirst == "tala" or tfirst == "töl":
                    # A complete 'töl' or 'no' is better (has more info) than a rough 'tala'
                    if tfirst == "tala":
                        sc[t] -= 1
                    # Discourage possessive ('ef') meanings for numbers
                    for pt in s:
                        if (pt.first == "no"
                                or pt.first == "töl") and pt.has_variant("ef"):
                            sc[pt] -= 1
                elif tfirst == "sérnafn":
                    if not tokens[i].t2:
                        # If there are no BÍN meanings, we had no choice but to use sérnafn,
                        # so alleviate some of the penalty given by the grammar
                        sc[t] += 2
                    else:
                        # BÍN meanings are available: discourage this
                        #print("sérnafn '{0}': BÍN meanings available, discouraging".format(tokens[i].t1))
                        sc[t] -= 6
                        if i == w.start:
                            # First token in sentence, and we have BÍN meanings:
                            # further discourage this
                            sc[t] -= 4
                        #print("Meanings for sérnafn {0}:".format(tokens[i].t1))
                        #for m in tokens[i].t2:
                        #    print("{0}".format(m))
                    #        if m.stofn[0].isupper():
                    #            sc[t] -= 4 # Discourage 'sérnafn' if an uppercase BÍN meaning is available
                    #            break
                elif t.name[0] in "\"'":
                    # Give a bonus for exact or semi-exact matches
                    sc[t] += 1

        #for i in range(w.start, w.end):
        #    print("At token '{0}' scores dict is:\n{1}".format(tokens[i].t1, scores[i]))
        return scores

Example #12

Show file

File: reducer.py Project: halldor/Reynir

    def go_with_score(self, forest):

        """ Returns the argument forest after pruning it down to a single tree """

        if forest is None:
            return (None, 0)
        w = forest

        # First pass: for each token, find the possible terminals that
        # can correspond to that token
        finals = defaultdict(set)
        tokens = dict()
        self._find_options(w, finals, tokens)

        # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token
        scores = dict()

        # Loop through the indices of the tokens spanned by this tree
        for i in range(w.start, w.end):

            s = finals[i]
            # Initially, each alternative has a score of 0
            scores[i] = { terminal: 0 for terminal in s }
            if len(s) > 1:
                # More than one terminal in the option set
                # Calculate the relative scores
                # Find out whether the first part of all the terminals are the same
                same_first = len(set(x.first for x in s)) == 1
                txt = tokens[i].lower
                # No need to check preferences if the first parts of all possible terminals are equal
                # Look up the preference ordering from Reynir.conf, if any
                prefs = None if same_first else Preferences.get(txt)
                found_pref = False
                sc = scores[i]
                if prefs:
                    for worse, better, factor in prefs:
                        for wt in s:
                            if wt.first in worse:
                                for bt in s:
                                    if wt is not bt and bt.first in better:
                                        if bt.name[0] in "\"'":
                                            # Literal terminal: be even more aggressive in promoting it
                                            sc[wt] -= 2 * factor
                                            sc[bt] += 6 * factor
                                        else:
                                            sc[wt] -= 2 * factor
                                            sc[bt] += 4 * factor
                                        found_pref = True
                #if not same_first and not found_pref:
                #    # Only display cases where there might be a missing pref
                #    print("Token '{0}' has {1} possible terminal matches: {2}".format(txt, len(s), s))

                # Apply heuristics to each terminal that potentially matches this token
                for t in s:
                    tfirst = t.first
                    if tfirst == "ao" or tfirst == "eo":
                        # Subtract from the score of all ao and eo
                        sc[t] -= 1
                    elif tfirst == "no":
                        if t.is_singular:
                            # Add to singular nouns relative to plural ones
                            sc[t] += 1
                        elif t.is_abbrev:
                            # Punish abbreviations in favor of other more specific terminals
                            sc[t] -= 1
                    elif tfirst == "fs":
                        if t.has_variant("nf"):
                            # Reduce the weight of the 'artificial' nominative prepositions
                            # 'næstum', 'sem', 'um'
                            sc[t] -= 3 # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5)
                        else:
                            # Else, give a bonus for each matched preposition
                            sc[t] += 2
                    elif tfirst == "so":
                        if t.variant(0) in "012":
                            # Consider verb arguments
                            # Normally, we give a bonus for verb arguments: the more matched, the better
                            adj = 2 * int(t.variant(0))
                            # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd'
                            if adj == 0:
                                # Zero arguments: we might not like this
                                for m in tokens[i].t2:
                                    if m.ordfl == "so" and m.stofn not in VerbObjects.VERBS[0]:
                                        # We're using a verb with zero arguments but that form is not
                                        # explicitly listed in Verbs.conf: discourage this
                                        # print("Discouraging zero-arg use of verb '{0}' (stem '{1}')".format(txt, m.stofn))
                                        adj = -1
                                        break
                            sc[t] += adj
                        if t.is_sagnb:
                            # We like sagnb and lh, it means that more
                            # than one piece clicks into place
                            sc[t] += 4
                        elif t.is_lh:
                            # sagnb is preferred to lh, but vb (veik beyging) is discouraged
                            if t.has_variant("vb"):
                                sc[t] -= 2
                            else:
                                sc[t] += 3
                        if t.is_subj:
                            # Give a small bonus for subject matches
                            if t.has_variant("none"):
                                # ... but a punishment for subj_none
                                sc[t] -= 3
                            else:
                                sc[t] += 1
                        if t.is_nh:
                            if (i > 0) and any(pt.first == 'nhm' for pt in finals[i - 1]):
                                # Give a bonus for adjacent nhm + so_nh terminals
                                sc[t] += 2 # Prop up the verb terminal with the nh variant
                                for pt in scores[i - 1].keys():
                                    if pt.first == 'nhm':
                                        # Prop up the nhm terminal
                                        scores[i - 1][pt] += 2
                            if any(pt.first == "no" and pt.has_variant("ef") and pt.is_plural for pt in s):
                                # If this is a so_nh and an alternative no_ef_ft exists, choose this one
                                # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða')
                                sc[t] += 2
                    elif tfirst == "tala" or tfirst == "töl":
                        # A complete 'töl' or 'no' is better (has more info) than a rough 'tala'
                        if tfirst == "tala":
                            sc[t] -= 1
                        # Discourage possessive ('ef') meanings for numbers
                        for pt in s:
                            if (pt.first == "no" or pt.first == "töl") and pt.has_variant("ef"):
                                sc[pt] -= 1
                    elif tfirst == "sérnafn":
                        if tokens[i].t2:
                            sc[t] -= 20 # Base penalty is -20
                            for m in tokens[i].t2:
                                sc[t] -= 1 # Subtract one for each BÍN meaning available
                                if m.stofn[0].isupper():
                                    sc[t] -= 8 # Heavily discourage 'sérnafn' if an uppercase BÍN meaning is available
                    elif t.name[0] in "\"'":
                        # Give a bonus for exact or semi-exact matches
                        sc[t] += 1

        # Third pass: navigate the tree bottom-up, eliminating lower-rated
        # options (subtrees) in favor of higher rated ones

        score = self._reduce(w, scores)

        return (w, score)

Example #13

Show file

def main():

    global fmttdcatalogueNumber  #to fix an issue with scope (line 330,333).

    ss = Preferences()
    hashtorrlistfile = ss.getwpath(
        "outpath1")  #("1seeding_ID+Hash+Filename.txt")
    directory_path = ss.getwpath("script2destdir")  #as source dir (hash-grabs)
    allfiles = [
        os.path.join(directory_path, filename)
        for filename in next(os.walk(directory_path))[2]
    ]  # gives absolute paths + names

    hashtofilenamefolder = ss.getwpath(
        "script3destdir")  #as dest dir (hash-grabs-as-filenames)
    writelistfile = codecs.open(
        ss.getwpath("outpath3"), 'wb', "utf-8"
    )  # write-out a text file with one entry per line. main output file (3propernames.txt)

    writelistcontainer = []
    currentfilenumber = 1

    for hashidfilename in allfiles:  # iterate through filenames of what.cd JSON data

        with open(hashidfilename, 'r') as stringfile:  # open them
            needFixLabeltoNewEdition = False

            jsonresponse = json.load(stringfile)
            tor = Torrent(jsonresponse)

            if tor.group.categoryName != "Music":
                continue  # do not continue altering any non-music torrents.

            releaseTypeName = ReleaseType(
                tor.group.releaseType
            ).name  # turn int. value into a string using the enum class above
            fmttdreleaseTypeName = "[" + releaseTypeName + "]"

            if tor.torrent.remastered:
                if tor.torrent.remasterTitle:
                    tor.group.name += " (" + tor.torrent.remasterTitle + ")"
                if tor.torrent.remasterYear > tor.group.year:
                    tor.group.year = tor.torrent.remasterYear
                if tor.torrent.remasterRecordLabel:
                    if tor.group.recordLabel.lower(
                    ) != tor.torrent.remasterRecordLabel.lower(
                    ):  # so not case sensitive
                        if not tor.group.recordLabel:
                            tor.group.recordLabel = tor.torrent.remasterRecordLabel
                        else:
                            # then things get complicated and we need to figure out which Label/catalog field is the best one to use, or combine them or both
                            needFixLabeltoNewEdition = True

                # if its been determined that its a remaster (new edition), process new label and catalog
                #  checking whether to combine with old, or which one to use, etc, etc.....
                if needFixLabeltoNewEdition == True:

                    score = difflib.SequenceMatcher(
                        None, tor.group.recordLabel.lower(),
                        tor.torrent.remasterRecordLabel.lower()).ratio()
                    if (score < 0.5):
                        # considered similar at 0.6 but this way is not that accurate. if they are lower than 0.5 similar, just use the new one
                        tor.group.recordLabel = tor.torrent.remasterRecordLabel
                        if tor.torrent.remasterCatalogueNumber:
                            tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber  # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber

                    elif all(word in tor.torrent.remasterRecordLabel.lower() for word in tor.group.recordLabel.lower()) \
                            or \
                            all(word in tor.group.recordLabel.lower() for word in tor.torrent.remasterRecordLabel.lower()):
                        # If all the words in the old label is encompassed in the new one, use the new one.
                        # This would mean the new edition record label is most likely longer and is similar enough to use that,
                        # and preferred, since its more applicable to this specific release anyway.
                        # Even if the reverse is true, this code-block should only catch labels that differ in slight ways.(?)

                        # example 1: originallabel={Big Beat Records}  remasterlabel={Big Beat}          #elif new label in old label
                        #   result:     {Big Beat}                                                      #or
                        # example 2: originallabel={Big Beat}  remasterlabel={Big Beat Records}          #if old label in new label
                        #   result:     {Big Beat Records}                                              #then
                        tor.group.recordLabel = tor.torrent.remasterRecordLabel  # always choose new label.
                        if tor.torrent.remasterCatalogueNumber:
                            tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber  # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber
                    else:
                        # This else-block is used when the above are not true, and we can't decide on which one to use, so we use both. Combined.

                        # example 1: originallabel={Island Records}  remasterlabel={Island Records / Lokal Legend}
                        #   result:     {Island Records / Lokal Legend}
                        # example 2: originallabel {Wall Recordings}  remasterlabel={Tiger Records}
                        #   result:     {Wall Recordings / Tiger Records}
                        new = ""
                        splitorig = re.sub(
                            "[(,),-]", " ", tor.group.recordLabel).split(
                            )  # remove delimiter chars that mess up stuff
                        sepchar = ["/"]
                        splitnew = re.sub(
                            "[(,),-]", " ", tor.torrent.remasterRecordLabel
                        ).split()  # turn everything into a list

                        new = " ".join([
                            "%s" % (v)
                            for v in getUniqueWords(splitorig + sepchar +
                                                    splitnew)
                        ])  # append unique words to the orig.

                        if tor.torrent.remasterCatalogueNumber:
                            if tor.group.catalogueNumber != tor.torrent.remasterCatalogueNumber:
                                if tor.group.catalogueNumber:
                                    tor.group.catalogueNumber += " / " + tor.torrent.remasterCatalogueNumber
                                else:
                                    tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber
                        tor.group.recordLabel = new

            # ntpath.basename was really slow so doing it manually.... (32 times faster)= 0.128 seconds vs 0.004 seconds
            # whats happening here is due to an exponential nested for loop, ie: 4129 results^2 = 17 million function calls of either basename or .rfind('\\')
            # there should be a better way to do this.
            hashfilesepidloc = hashidfilename.rfind("\\") + 1
            cmphashfn = hashidfilename[hashfilesepidloc:]
            iterhashfile = open(
                hashtorrlistfile,
                'rb').readlines()  # read everything into memory
            for i in iterhashfile:  # read line
                splitline = i.strip().split(
                    ' / ')  # 0 torrentID / 1 Hash / 2 torrentfilename
                if splitline[
                        1] == cmphashfn:  # if it matches, start processing
                    newEntry = TorrentEntry()  # instanciate class
                    newEntry.hash = splitline[1]  # store Hash for reference
                    newEntry.pathname = splitline[2].decode(
                        "utf-8")  # filename + extension
                    locextension = newEntry.pathname.find(
                        ".torrent")  # location of extension
                    locid = newEntry.pathname.rfind(
                        "-") + 1  # location of tor.torrent.id
                    newEntry.filename = newEntry.pathname[:
                                                          locextension]  # chop the extension off (manually)
                    newEntry.artistalbum = newEntry.filename[:locid -
                                                             1]  # JUST the name (no ID#)
                    newEntry.torrentid = newEntry.filename[
                        locid:
                        locextension]  # grab ID for future reference (tor.torrent.id on what.cd)
                    # example : S-Type - Billboard (Lido Remix) - 2014 (WEB - MP3 - 320)
                    newEntry.artist = newEntry.artistalbum[:newEntry.
                                                           artistalbum.find(
                                                               " - "
                                                           )]  # grab artist
                    tempalbum = newEntry.artistalbum[
                        newEntry.artistalbum.find(" - ") +
                        3:]  # temp value helps with string processing
                    newEntry.album = tempalbum[:tempalbum.find(
                        " - "
                    )]  # not needed since it can be pulled from [group]
                    newEntry.year = tempalbum[
                        tempalbum.find(" - ") + 3:tempalbum.find(" - ") +
                        7]  # not needed since it can be pulled from [group]

                    # ------------Recreate name------------#
                    # -------Special RULES SECTION---------#
                    newEntry.createdpropername = newEntry.artist + u" - " + tor.group.name + " "
                    if tor.group.releaseType > 1:  # dont put it for Album or Unspecified
                        if tor.group.releaseType != 5:  # do something different for EP
                            newEntry.createdpropername += fmttdreleaseTypeName + " "
                        else:  # make a rule so [EP] doesnt come up if there is " EP " already
                            if tor.group.name[-2:] != "EP":
                                newEntry.createdpropername += fmttdreleaseTypeName + " "

                    newEntry.createdpropername += "(" + str(
                        tor.group.year) + ")"

                    # written like this for easy humanreading
                    #           format = MP3, FLAC, AAC,
                    #          media = cd, web, vinyl, soundboard, dat
                    #        encoding = lossless,320,v0,256,v2,192
                    if tor.torrent.format == "FLAC":
                        newEntry.fmttdMediaEncodeFormat = "FLAC"
                        # log and logscore only applicable to flac.
                        if tor.torrent.hasLog:
                            newEntry.fmttdMediaEncodeFormat += " " + str(
                                tor.torrent.logScore
                            ) + "%"  # the % implies "log" so leave out the word log
                    if tor.torrent.format == "AAC":
                        if (any("itunes" in word.lower()
                                for word in tor.torrent.description.split())
                            ) or (any(
                                "itunes" in word.lower()
                                for word in tor.torrent.filePath.split())):
                            newEntry.fmttdMediaEncodeFormat = "iTunes "
                        newEntry.fmttdMediaEncodeFormat += "AAC"
                    if tor.torrent.format == "MP3":
                        # dont actually write mp3
                        # only write Scene or WEB if it is an mp3
                        if tor.torrent.scene:
                            newEntry.fmttdMediaEncodeFormat += "Scene"
                        elif tor.torrent.media == "WEB":
                            newEntry.fmttdMediaEncodeFormat += "WEB"
                        else:
                            newEntry.fmttdMediaEncodeFormat += tor.torrent.media
                        if "VBR" in tor.torrent.encoding:
                            newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding[:
                                                                                          2]
                        else:
                            newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding

                    newEntry.fmttdMediaEncodeFormat = "[" + newEntry.fmttdMediaEncodeFormat + "]"

                    newEntry.createdpropername += " " + newEntry.fmttdMediaEncodeFormat

                    # put catalog number in brackets
                    if tor.group.catalogueNumber:
                        fmttdcatalogueNumber = ("[" +
                                                tor.group.catalogueNumber +
                                                "]").replace(" ", "").upper()
                    elif tor.group.recordLabel:
                        fmttdcatalogueNumber = "[" + tor.group.recordLabel + "]"  # combines with next part to put recordLabel in the front if Cat# missing

                    if any(word in tor.group.recordLabel.lower() for word in
                           Sorted_Record_Labels_List):  # so not case sensitive
                        newEntry.createdpropername = fmttdcatalogueNumber + " " + newEntry.createdpropername
                    elif tor.group.recordLabel:
                        newEntry.createdpropername += " " + "{" + tor.group.recordLabel + "}"
                        # if tor.group.catalogueNumber:                                                       #This will put [CATA###] after all releases, even labels not in your list
                        #     newEntry.createdpropername += " " + fmttdcatalogueNumber              #Gets kind of cumbersome for me. (uncomment to use it anyway)

                    # these 2 lines are a quick fix, for an oversight in my naming process
                    # if these are single file .mp3's (or a single .flac) they will need a .mp3 at the end of the filename
                    if not tor.torrent.filePath:
                        newEntry.createdpropername += "." + tor.torrent.format.lower(
                        )

                    try:
                        print currentfilenumber, newEntry.createdpropername.encode(
                            'ascii', errors='ignore')
                    except:
                        print "COULD NOT PRINT UNICODE FILENAME TO CONSOLE. HASH=", tor.torrent.infoHash

                    ########-------------replace characters section----------------#########
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "\\",
                        u"＼")  # U+FF3C               FULLWIDTH REVERSE SOLIDUS
                    # these forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it.
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        " / ", u"／")  # U+FFOF  (wide)       FULLWIDTH SOLIDUS
                    # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth)
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "/", u" ∕ ")  # U+2215  (narrow)     DIVISION SLASH
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        ":",
                        u"꞉")  # U+A789               MODIFIER LETTER COLON
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "*", u"※")  # U+203B               REFERENCE MARK
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "?", u"؟")  # U+061F               ARABIC QUESTION MARK
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        '"', u"ʺ"
                    )  # U+02BA               MODIFIER LETTER DOUBLE PRIME
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "<", u"˂"
                    )  # U+02C2               MODIFIER LETTER LEFT ARROWHEAD
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        ">", u"˃"
                    )  # U+02C3               MODIFIER LETTER RIGHT ARROWHEAD
                    newEntry.createdpropername = newEntry.createdpropername.replace(
                        "|",
                        u"ǀ")  # U+01C0               LATIN LETTER DENTAL CLICK
                    #####--windows filename banned chars replacement with unicode--#########

                    ######----------HashGrabs-as-Filenames--------########
                    # File output. Move all files named as hashes to a new dir as the proper name
                    if not os.path.exists(hashtofilenamefolder +
                                          newEntry.createdpropername):
                        shutil.copy(
                            hashidfilename,
                            hashtofilenamefolder + newEntry.createdpropername)

                    currentfilenumber += 1
                    #####------------make propernames.txt (has the hash in it also) ---------########
                    # Add it to the container (since this is in a loop)
                    writelistcontainer.append(newEntry.createdpropername +
                                              " / " + tor.torrent.infoHash +
                                              "\n")
    ##File Output. The Master List file of everything.##
    # when the loop exits, Sort it, and write it to the file.
    writelistcontainer.sort()
    for eachline in writelistcontainer:
        writelistfile.write(eachline)
    writelistfile.close()

Example #14

Show file

File: 4write-uT-resumedat.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()

    newfile = open(os.path.join(ss.get("maindir"), "NEWDAT.dat"), 'wb')
    namesandhashfile = open(
        ss.getwpath("outpath3"), 'r',
        encoding='utf-8').readlines()  #("3propernames.txt")

    beforeafterpath = ss.getwpath(
        "outpath4"
    )  #this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt)

    #torrentlist = decoder.decode_from_file(ss.get("utresumedat"))  #works   10.645s 12315181 function calls
    #torrentlist = bencode2en.decode_from_file(ss.get("utresumedat")) #works 8.462s 13745202 function calls
    torrentlist = bencode.decode_from_file(
        ss.get("utresumedat"))  #works  8.057ss 10908143 function calls

    #These two things interfere with the processing on the next line
    fileguarduseless = torrentlist.pop(b".fileguard", None)
    rec = torrentlist.pop(b"rec", None)  #Remove this.
    #(dict. comprehension expects only dicts as the root keys)
    #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup = {
        base64.b16encode(value[b"info"]):
        [key, value[b"caption"], value[b"path"]]
        for key, value in torrentlist.items()
    }

    listofbeforeafter = []
    #to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile
    for eachline in namesandhashfile:
        nameandhash = eachline.strip().split(
            ' / '
        )  #strip out the \n with strip() and split on the " / " i put there as a seperator.
        theNewname = nameandhash[0]
        thehash = nameandhash[1]
        #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if bytes(thehash, 'utf-8') in reverselookup:
            key = reverselookup[bytes(thehash, 'utf-8')][0]
            theOldPath = torrentlist[key][b"path"].decode('utf-8')
            theNewPath = os.path.join(os.path.dirname(theOldPath), theNewname)
            if theOldPath != theNewPath:
                listofbeforeafter.append(
                    [theOldPath, theNewPath, thehash]
                )  # make a list of a list (stringtoOutputtoFile=[0], hash=[1])

    #sort, then write file detailing changes to path (before / after)
    listofbeforeafter.sort()
    beforeafterfile = open(beforeafterpath, 'w', encoding='utf-8')
    for eachline in listofbeforeafter:
        beforeafterfile.write(
            eachline[0] + " / " + eachline[2] + "\n"
        )  #write oldpath + hash on 1st line    /The hash is duplicated for error checking in case the user accidentally bungles a character while editing...
        beforeafterfile.write(eachline[1] + " / " + eachline[2] +
                              "\n")  #write newpath + hash on 2nd line   /
    beforeafterfile.close()

    #At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file
    input("Press Enter to begin Renaming files.......\\> "
          )  #wait for the user to press Enter before continuing with anything.

    #WRITE TORRENT RESUME.DAT
    beforeafterfile = open(beforeafterpath, 'r', encoding='utf-8').readlines()
    for i in range(0, len(beforeafterfile), 2):
        beforeandhash = beforeafterfile[i].strip().split(' / ')
        afterandhash = beforeafterfile[i + 1].strip().split(' / ')
        before = beforeandhash[0]
        beforehash = beforeandhash[1]
        after = afterandhash[0]
        afterhash = afterandhash[1]
        if beforehash == afterhash:
            thehash = beforehash
        else:
            print(
                "Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries."
            )
            print(
                "Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it..."
            )
            print(
                "Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator."
            )
        #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if bytes(thehash, 'utf-8') in reverselookup:
            key = reverselookup[bytes(thehash, 'utf-8')][0]
            torrentlist[key][b"caption"] = bytes(after[after.rfind("\\") + 1:],
                                                 'utf-8')
            try:
                # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines).  (tip:) to show incremental numbers use (((i+1)/2)+1)
                # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying)
                print(i, before.encode('ascii', errors='ignore').decode())
                print(i + 1, after.encode('ascii', errors='ignore').decode())
                os.rename(before, after)
            except Exception as e:
                traceback.print_exc(
                )  #will output any errors to console but keep going
            torrentlist[key][b"path"] = bytes(after, 'utf-8')
            if after.endswith(".mp3") or after.endswith(
                    ".flac"
            ):  #.mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner
                if b"targets" in torrentlist[
                        key]:  #these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough
                    torrentlist[key][b"targets"][0][1] = torrentlist[key][
                        b"caption"]  #single-file-mode torrents have a "targets" list that controls the filename

        torrentlist[
            b"rec"] = rec  #add the thing we removed back in so we dont break anything (not sure what this is)
        #fileguard does not need to go back, in fact, purposefully needs to stay out.
    #newfile.write(encode.encode(torrentlist))       #works    10.295s 15361310 function calls
    #newfile.write(bencode2en.bencode2(torrentlist)) #v.slow  31.872s 12452142 function calls
    #newfile.write(bencode2en.bencode4(torrentlist))  #works   7.864s 10906619 function calls
    newfile.write(
        bencode.bencode(torrentlist))  #works    7.699s 10906619 function calls
    newfile.close()
    print(
        "\nPlease note that the filenames shown are missing any unicode characters due to Windows Command Prompt limitations."
    )
    print("Finished writing: ", newfile.name)

Example #15

Show file

File: 4write-uT-resumedat.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()

    newfile = open(os.path.join(ss.get("maindir"),"NEWDAT.dat"),'wb')
    namesandhashfile = open(ss.getwpath("outpath3"),'r',encoding='utf-8').readlines()       #("3propernames.txt")

    beforeafterpath = ss.getwpath("outpath4")   #this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt)

    #torrentlist = decoder.decode_from_file(ss.get("utresumedat"))  #works   10.645s 12315181 function calls
    #torrentlist = bencode2en.decode_from_file(ss.get("utresumedat")) #works 8.462s 13745202 function calls
    torrentlist = bencode.decode_from_file(ss.get("utresumedat"))  #works  8.057ss 10908143 function calls

    #These two things interfere with the processing on the next line 
    fileguarduseless = torrentlist.pop(b".fileguard",None)
    rec = torrentlist.pop(b"rec",None)   #Remove this. 
    #(dict. comprehension expects only dicts as the root keys)
    #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup={base64.b16encode(value[b"info"]):[key,value[b"caption"],value[b"path"]] for key,value in torrentlist.items()}

    listofbeforeafter = []
    #to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile   
    for eachline in namesandhashfile:
        nameandhash = eachline.strip().split(' / ')   #strip out the \n with strip() and split on the " / " i put there as a seperator.
        theNewname = nameandhash[0]
        thehash = nameandhash[1]
        #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if bytes(thehash,'utf-8') in reverselookup:
            key = reverselookup[bytes(thehash,'utf-8')][0]
            theOldPath = torrentlist[key][b"path"].decode('utf-8')
            theNewPath = os.path.join(os.path.dirname(theOldPath),theNewname)
            if theOldPath != theNewPath:
                listofbeforeafter.append([theOldPath,theNewPath,thehash])   # make a list of a list (stringtoOutputtoFile=[0], hash=[1])            

    #sort, then write file detailing changes to path (before / after)
    listofbeforeafter.sort()
    beforeafterfile = open(beforeafterpath,'w',encoding='utf-8')
    for eachline in listofbeforeafter:
        beforeafterfile.write(eachline[0] + " / " + eachline[2] + "\n")         #write oldpath + hash on 1st line    /The hash is duplicated for error checking in case the user accidentally bungles a character while editing...
        beforeafterfile.write(eachline[1] + " / " + eachline[2] + "\n")         #write newpath + hash on 2nd line   /
    beforeafterfile.close()

    #At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file
    input("Press Enter to begin Renaming files.......\\> ")  #wait for the user to press Enter before continuing with anything.

    #WRITE TORRENT RESUME.DAT
    beforeafterfile = open(beforeafterpath,'r',encoding='utf-8').readlines()
    for i in range(0, len(beforeafterfile), 2):
        beforeandhash = beforeafterfile[i].strip().split(' / ')
        afterandhash = beforeafterfile[i+1].strip().split(' / ')
        before = beforeandhash[0]
        beforehash = beforeandhash[1]
        after = afterandhash[0]
        afterhash = afterandhash[1]
        if beforehash == afterhash:
            thehash = beforehash
        else:
            print("Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries.")
            print("Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it...")
            print("Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator.")
        #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if bytes(thehash,'utf-8') in reverselookup:
            key = reverselookup[bytes(thehash,'utf-8')][0]
            torrentlist[key][b"caption"] = bytes(after[after.rfind("\\")+1:],'utf-8')
            try:
               # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines).  (tip:) to show incremental numbers use (((i+1)/2)+1) 
               # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying)
                print(i,before.encode('ascii', errors='ignore').decode())
                print(i+1,after.encode('ascii', errors='ignore').decode())
                os.rename(before, after)
            except Exception as e:
                traceback.print_exc()       #will output any errors to console but keep going
            torrentlist[key][b"path"] = bytes(after,'utf-8')
            if after.endswith(".mp3") or after.endswith(".flac"):     #.mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner
                if b"targets" in torrentlist[key]:                     #these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough
                    torrentlist[key][b"targets"][0][1] = torrentlist[key][b"caption"]           #single-file-mode torrents have a "targets" list that controls the filename

        torrentlist[b"rec"]=rec   #add the thing we removed back in so we dont break anything (not sure what this is)
                                #fileguard does not need to go back, in fact, purposefully needs to stay out.
    #newfile.write(encode.encode(torrentlist))       #works    10.295s 15361310 function calls
    #newfile.write(bencode2en.bencode2(torrentlist)) #v.slow  31.872s 12452142 function calls
    #newfile.write(bencode2en.bencode4(torrentlist))  #works   7.864s 10906619 function calls
    newfile.write(bencode.bencode(torrentlist))     #works    7.699s 10906619 function calls
    newfile.close()
    print("\nPlease note that the filenames shown are missing any unicode characters due to Windows Command Prompt limitations.")
    print("Finished writing: ", newfile.name)

Example #16

Show file

File: 3create-proper-names.py Project: genbtc/whatCD-API-utorrent-renamer

def main():

    global fmttdcatalogueNumber     #to fix an issue with scope (line 330,333).

    ss = Preferences()
    hashtorrlistfile = ss.getwpath("outpath1")
    directory_path = ss.getwpath("script2destdir")   #as source dir (hash-grabs)
    allfiles = [os.path.join(directory_path, filename) for filename in next(os.walk(directory_path))[2]]  # gives absolute paths + names

    hashtofilenamefolder = ss.getwpath("script3destdir") #as dest dir (hash-grabs-as-filenames)
    writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8")  # write-out a text file with one entry per line. main output file (3propernames.txt)

    writelistcontainer = []
    currentfilenumber = 1

    for hashidfilename in allfiles:  # iterate through filenames of what.cd JSON data

        with open(hashidfilename, 'r') as stringfile:  # open them
            needFixLabeltoNewEdition = False

            jsonresponse = json.load(stringfile)
            tor = Torrent(jsonresponse)

            if tor.group.categoryName != "Music":
                continue  # do not continue altering any non-music torrents.

            releaseTypeName = ReleaseType(tor.group.releaseType).name  # turn int. value into a string using the enum class above
            fmttdreleaseTypeName = "[" + releaseTypeName + "]"

            if tor.torrent.remastered:
                if tor.torrent.remasterTitle:
                    tor.group.name += " (" + tor.torrent.remasterTitle + ")"
                if tor.torrent.remasterYear > tor.group.year:
                    tor.group.year = tor.torrent.remasterYear
                if tor.torrent.remasterRecordLabel:
                    if tor.group.recordLabel.lower() != tor.torrent.remasterRecordLabel.lower():  # so not case sensitive
                        if not tor.group.recordLabel:
                            tor.group.recordLabel = tor.torrent.remasterRecordLabel
                        else:
                            # then things get complicated and we need to figure out which Label/catalog field is the best one to use, or combine them or both
                            needFixLabeltoNewEdition = True

                # if its been determined that its a remaster (new edition), process new label and catalog
                #  checking whether to combine with old, or which one to use, etc, etc.....
                if needFixLabeltoNewEdition == True:

                    score = difflib.SequenceMatcher(None, tor.group.recordLabel.lower(), tor.torrent.remasterRecordLabel.lower()).ratio()
                    if (score < 0.5):
                        # considered similar at 0.6 but this way is not that accurate. if they are lower than 0.5 similar, just use the new one
                        tor.group.recordLabel = tor.torrent.remasterRecordLabel
                        if tor.torrent.remasterCatalogueNumber:
                            tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber  # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber

                    elif all(word in tor.torrent.remasterRecordLabel.lower() for word in tor.group.recordLabel.lower()) \
                            or \
                            all(word in tor.group.recordLabel.lower() for word in tor.torrent.remasterRecordLabel.lower()):
                        # If all the words in the old label is encompassed in the new one, use the new one.
                        # This would mean the new edition record label is most likely longer and is similar enough to use that,
                        # and preferred, since its more applicable to this specific release anyway.
                        # Even if the reverse is true, this code-block should only catch labels that differ in slight ways.(?)

                        # example 1: originallabel={Big Beat Records}  remasterlabel={Big Beat}          #elif new label in old label
                        #   result:     {Big Beat}                                                      #or
                        # example 2: originallabel={Big Beat}  remasterlabel={Big Beat Records}          #if old label in new label
                        #   result:     {Big Beat Records}                                              #then
                        tor.group.recordLabel = tor.torrent.remasterRecordLabel  # always choose new label.
                        if tor.torrent.remasterCatalogueNumber:
                            tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber  # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber
                    else:
                        # This else-block is used when the above are not true, and we can't decide on which one to use, so we use both. Combined.

                        # example 1: originallabel={Island Records}  remasterlabel={Island Records / Lokal Legend}
                        #   result:     {Island Records / Lokal Legend}
                        # example 2: originallabel {Wall Recordings}  remasterlabel={Tiger Records}
                        #   result:     {Wall Recordings / Tiger Records}  
                        new = ""
                        splitorig = re.sub("[(,),-]", " ",
                                           tor.group.recordLabel).split()  # remove delimiter chars that mess up stuff
                        sepchar = ["/"]
                        splitnew = re.sub("[(,),-]", " ", tor.torrent.remasterRecordLabel).split()  # turn everything into a list

                        new = " ".join(["%s" % (v) for v in getUniqueWords(
                            splitorig + sepchar + splitnew)])  # append unique words to the orig.

                        if tor.torrent.remasterCatalogueNumber:
                            if tor.group.catalogueNumber != tor.torrent.remasterCatalogueNumber:
                                if tor.group.catalogueNumber:
                                    tor.group.catalogueNumber += " / " + tor.torrent.remasterCatalogueNumber
                                else:
                                    tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber
                        tor.group.recordLabel = new

            # ntpath.basename was really slow so doing it manually.... (32 times faster)= 0.128 seconds vs 0.004 seconds
            # whats happening here is due to an exponential nested for loop, ie: 4129 results^2 = 17 million function calls of either basename or .rfind('\\')
            # there should be a better way to do this.                        
            hashfilesepidloc = hashidfilename.rfind("\\") + 1
            cmphashfn = hashidfilename[hashfilesepidloc:]
            iterhashfile = open(hashtorrlistfile, 'r',encoding='utf-8').readlines()  # read everything into memory
            for i in iterhashfile:  # read line
                splitline = i.strip().split(' / ')      # 0 torrentID / 1 Hash / 2 torrentfilename
                if splitline[1] == cmphashfn:  # if it matches, start processing
                    newEntry = TorrentEntry()  # instanciate class
                    newEntry.hash = splitline[1]  # store Hash for reference
                    newEntry.pathname = splitline[2]  # filename + extension
                    locextension = newEntry.pathname.find(".torrent")  # location of extension
                    locid = newEntry.pathname.rfind("-") + 1  # location of tor.torrent.id
                    newEntry.filename = newEntry.pathname[:locextension]  # chop the extension off (manually)
                    newEntry.artistalbum = newEntry.filename[:locid - 1]  # JUST the name (no ID#)
                    newEntry.torrentid = newEntry.filename[locid:locextension]  # grab ID for future reference (tor.torrent.id on what.cd)
                    # example : S-Type - Billboard (Lido Remix) - 2014 (WEB - MP3 - 320)
                    newEntry.artist = newEntry.artistalbum[:newEntry.artistalbum.find(" - ")]  # grab artist
                    tempalbum = newEntry.artistalbum[newEntry.artistalbum.find(" - ") + 3:]  # temp value helps with string processing
                    newEntry.album = tempalbum[:tempalbum.find(" - ")]  # not needed since it can be pulled from [group]
                    newEntry.year = tempalbum[tempalbum.find(" - ") + 3:tempalbum.find(" - ") + 7]  # not needed since it can be pulled from [group]

                    # ------------Recreate name------------#
                    # -------Special RULES SECTION---------#
                    newEntry.createdpropername = newEntry.artist + " - " + tor.group.name + " "
                    if tor.group.releaseType > 1:  # dont put it for Album or Unspecified
                        if tor.group.releaseType != 5:  # do something different for EP
                            newEntry.createdpropername += fmttdreleaseTypeName + " "
                        else:  # make a rule so [EP] doesnt come up if there is " EP " already
                            if tor.group.name[-2:] != "EP":
                                newEntry.createdpropername += fmttdreleaseTypeName + " "

                    newEntry.createdpropername += "(" + str(tor.group.year) + ")"

                    # written like this for easy humanreading
                    #           format = MP3, FLAC, AAC,
                    #          media = cd, web, vinyl, soundboard, dat
                    #        encoding = lossless,320,v0,256,v2,192
                    if tor.torrent.format == "FLAC":
                        newEntry.fmttdMediaEncodeFormat = "FLAC"
                        # log and logscore only applicable to flac.
                        if tor.torrent.hasLog:
                            newEntry.fmttdMediaEncodeFormat += " " + str(
                                tor.torrent.logScore) + "%"  # the % implies "log" so leave out the word log
                    if tor.torrent.format == "AAC":
                        if (any("itunes" in word.lower() for word in tor.torrent.description.split())) or (
                        any("itunes" in word.lower() for word in tor.torrent.filePath.split())):
                            newEntry.fmttdMediaEncodeFormat = "iTunes "
                        newEntry.fmttdMediaEncodeFormat += "AAC"
                    if tor.torrent.format == "MP3":
                        # dont actually write mp3
                        # only write Scene or WEB if it is an mp3
                        if tor.torrent.scene:
                            newEntry.fmttdMediaEncodeFormat += "Scene"
                        elif tor.torrent.media == "WEB":
                            newEntry.fmttdMediaEncodeFormat += "WEB"
                        else:
                            newEntry.fmttdMediaEncodeFormat += tor.torrent.media
                        if "VBR" in tor.torrent.encoding:
                            newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding[:2]
                        else:
                            newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding

                    newEntry.fmttdMediaEncodeFormat = "[" + newEntry.fmttdMediaEncodeFormat + "]"

                    newEntry.createdpropername += " " + newEntry.fmttdMediaEncodeFormat

                    # put catalog number in brackets
                    if tor.group.catalogueNumber:
                        fmttdcatalogueNumber = ("[" + tor.group.catalogueNumber + "]").replace(" ", "").upper()
                    elif tor.group.recordLabel:
                        fmttdcatalogueNumber = "[" + tor.group.recordLabel + "]"  # combines with next part to put recordLabel in the front if Cat# missing

                    if any(word in tor.group.recordLabel.lower() for word in Sorted_Record_Labels_List):  # so not case sensitive
                        newEntry.createdpropername = fmttdcatalogueNumber + " " + newEntry.createdpropername
                    elif tor.group.recordLabel:
                        newEntry.createdpropername += " " + "{" + tor.group.recordLabel + "}"
                        # if tor.group.catalogueNumber:                                                       #This will put [CATA###] after all releases, even labels not in your list
                        #     newEntry.createdpropername += " " + fmttdcatalogueNumber              #Gets kind of cumbersome for me. (uncomment to use it anyway)

                    # these 2 lines are a quick fix, for an oversight in my naming process
                    # if these are single file .mp3's (or a single .flac) they will need a .mp3 at the end of the filename
                    if not tor.torrent.filePath:
                        newEntry.createdpropername += "." + tor.torrent.format.lower()

                    try:
                        print(currentfilenumber, newEntry.createdpropername.encode('ascii', errors='ignore').decode())
                    except:
                        print("COULD NOT PRINT UNICODE FILENAME TO CONSOLE. HASH=", tor.torrent.infoHash)

                    ########-------------replace characters section----------------#########
                    newEntry.createdpropername = newEntry.createdpropername.replace("\\","＼")  # U+FF3C               FULLWIDTH REVERSE SOLIDUS
                    # these forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it.
                    newEntry.createdpropername = newEntry.createdpropername.replace(" / ","／")  # U+FFOF  (wide)       FULLWIDTH SOLIDUS
                    # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth)
                    newEntry.createdpropername = newEntry.createdpropername.replace("/"," ∕ ")  # U+2215  (narrow)     DIVISION SLASH
                    newEntry.createdpropername = newEntry.createdpropername.replace(":","꞉")  # U+A789               MODIFIER LETTER COLON
                    newEntry.createdpropername = newEntry.createdpropername.replace("*","※")  # U+203B               REFERENCE MARK
                    newEntry.createdpropername = newEntry.createdpropername.replace("?","؟")  # U+061F               ARABIC QUESTION MARK
                    newEntry.createdpropername = newEntry.createdpropername.replace('"',"ʺ")  # U+02BA               MODIFIER LETTER DOUBLE PRIME
                    newEntry.createdpropername = newEntry.createdpropername.replace("<","˂")  # U+02C2               MODIFIER LETTER LEFT ARROWHEAD
                    newEntry.createdpropername = newEntry.createdpropername.replace(">","˃")  # U+02C3               MODIFIER LETTER RIGHT ARROWHEAD
                    newEntry.createdpropername = newEntry.createdpropername.replace("|","ǀ")  # U+01C0               LATIN LETTER DENTAL CLICK
                    #####--windows filename banned chars replacement with unicode--#########

                    ######----------HashGrabs-as-Filenames--------########
                    # File output. Move all files named as hashes to a new dir as the proper name
                    if not os.path.exists(hashtofilenamefolder + newEntry.createdpropername):
                        shutil.copy(hashidfilename, hashtofilenamefolder + newEntry.createdpropername)

                    currentfilenumber += 1
                    #####------------make propernames.txt (has the hash in it also) ---------########
                    # Add it to the container (since this is in a loop)
                    writelistcontainer.append(newEntry.createdpropername + " / " + tor.torrent.infoHash + "\n")
    ##File Output. The Master List file of everything.##                    
    # when the loop exits, Sort it, and write it to the file.
    writelistcontainer.sort()
    for eachline in writelistcontainer:
        writelistfile.write(eachline)
    writelistfile.close()

Example #17

Show file

File: alt123pre4-RenameOnlyFromFilename.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()
    script1sourcedir = ss.getwpath(u"script1sourcedir")+u''            #("seeding\"), needs unicode u for file opening.
    files = [os.path.join(script1sourcedir,filename) for filename in next(os.walk(script1sourcedir))[2]]        #gives absolute paths + names

    currentfile = 0

    container = []    #set up an empty container for desired data to get put into for later
    for eachfile in files:

        metainfo = bencode.decode_from_file(eachfile)
        # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash
        infodict = metainfo['info']
        info_hash = hashlib.sha1(bencode.bencode(infodict)).hexdigest().upper()

        internalname = infodict['name']
        torrentfilename = eachfile[eachfile.rfind("\\")+1:]
        locextension = torrentfilename.find(".torrent")           #location of extension (char position)
        locid = torrentfilename.rfind("-")+1                      #location of torrentID (char position)
        torrentid = torrentfilename[locid:locextension]           #grab torrentID 
        
        torrentfilename = torrentfilename[:locid-1]

        #####-------------replace banned characters with unicode section-----------------######
        ###
        # Forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it.
        torrentfilename = torrentfilename.replace(" / ",u"／")  # U+FFOF  (wide)       FULLWIDTH SOLIDUS
        # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth)
        torrentfilename = torrentfilename.replace("/",u" ∕ ")  # U+2215  (narrow)     DIVISION SLASH
        # Backslash (requires two slashes in python)
        torrentfilename = torrentfilename.replace("\\",u"＼")  # U+FF3C               FULLWIDTH REVERSE SOLIDUS
        # Colon
        torrentfilename = torrentfilename.replace(":",u"꞉")  # U+A789               MODIFIER LETTER COLON
        # asterisk
        torrentfilename = torrentfilename.replace("*",u"※")  # U+203B               REFERENCE MARK
        # question mark (replacement is backwards, sorry)
        torrentfilename = torrentfilename.replace("?",u"؟")  # U+061F               ARABIC QUESTION MARK
        # Double-quote
        torrentfilename = torrentfilename.replace('"',u"ʺ")  # U+02BA               MODIFIER LETTER DOUBLE PRIME
        # Left angle bracket
        torrentfilename = torrentfilename.replace("<",u"˂")  # U+02C2               MODIFIER LETTER LEFT ARROWHEAD
        # right angle bracket
        torrentfilename = torrentfilename.replace(">",u"˃")  # U+02C3               MODIFIER LETTER RIGHT ARROWHEAD
        # Pipe
        torrentfilename = torrentfilename.replace("|",u"ǀ")  # U+01C0               LATIN LETTER DENTAL CLICK
        ###
        #####----------windows filename banned chars replacement with unicode-----------######

        container.append([torrentfilename, internalname, info_hash, torrentid])
        currentfile += 1 
        print currentfile, torrentfilename.encode('ascii', errors='ignore')

    print "\nReminder: Console output is ascii only, Cannot Print Unicode. (chars omitted)"
    ##File Output. The Master List file of everything.##                    
    # when the loop exits, Sort it, and write it to the file.
    container.sort()
    writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8")  # write-out a text file with one entry per line. main output file (3propernames.txt)
    for eachline in container:
        writelistfile.write(eachline[0] + " / " + eachline[2] + "\n")   #torrentname  / infohash
    writelistfile.close()
    print "Completed. Unicode File Written to: ", os.path.basename(ss.getwpath("outpath3"))

Example #18

Show file

File: reducer.py Project: haukurb/ReynirPackage

    def _calc_terminal_scores(self, w):
        """ Calculate the score for each possible terminal/token match """

        # First pass: for each token, find the possible terminals that
        # can correspond to that token
        finals = defaultdict(set)
        tokens = dict()
        self._find_options(w, finals, tokens)

        # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token
        scores = dict()
        noun_prefs = NounPreferences.DICT

        # Loop through the indices of the tokens spanned by this tree
        for i in range(w.start, w.end):

            s = finals[i]
            # Initially, each alternative has a score of 0
            scores[i] = {terminal: 0 for terminal in s}

            if len(s) <= 1:
                # No ambiguity to resolve here
                continue

            # More than one terminal in the option set for the token at index i
            # Calculate the relative scores
            # Find out whether the first part of all the terminals are the same
            same_first = len(set(terminal.first for terminal in s)) == 1
            txt = tokens[i].lower
            # Get the last part of a composite word (e.g. 'jaðar-áhrifin' -> 'áhrifin')
            txt_last = txt.rsplit('-', maxsplit=1)[-1]
            # No need to check preferences if the first parts of all possible terminals are equal
            # Look up the preference ordering from Reynir.conf, if any
            prefs = None if same_first else Preferences.get(txt_last)
            sc = scores[i]
            if prefs:
                adj_worse = defaultdict(int)
                adj_better = defaultdict(int)
                for worse, better, factor in prefs:
                    for wt in s:
                        if wt.first in worse:
                            for bt in s:
                                if wt is not bt and bt.first in better:
                                    if bt.name[0] in "\"'":
                                        # Literal terminal: be even more aggressive in promoting it
                                        adj_w = -2 * factor
                                        adj_b = +6 * factor
                                    else:
                                        adj_w = -2 * factor
                                        adj_b = +4 * factor
                                    adj_worse[wt] = min(adj_worse[wt], adj_w)
                                    adj_better[bt] = max(adj_better[bt], adj_b)
                for wt, adj in adj_worse.items():
                    sc[wt] += adj
                for bt, adj in adj_better.items():
                    sc[bt] += adj

            # Apply heuristics to each terminal that potentially matches this token
            for t in s:
                tfirst = t.first
                if tfirst == "ao" or tfirst == "eo":
                    # Subtract from the score of all ao and eo
                    sc[t] -= 1
                elif tfirst == "no":
                    if t.is_singular:
                        # Add to singular nouns relative to plural ones
                        sc[t] += 1
                    elif t.is_abbrev:
                        # Punish abbreviations in favor of other more specific terminals
                        sc[t] -= 1
                    if tokens[i].is_upper and tokens[i].is_word and tokens[
                            i].t2:
                        # Punish connection of normal noun terminal to
                        # an uppercase word that can be a person or entity name
                        if any(m.fl in {"ism", "föð", "móð", "örn", "fyr"}
                               for m in tokens[i].t2):
                            # logging.info("Punishing connection of {0} with 'no' terminal".format(tokens[i].t1))
                            sc[t] -= 5
                    # Noun priorities, i.e. between different genders
                    # of the same word form
                    # (for example "ára" which can refer to three stems with different genders)
                    if txt_last in noun_prefs:
                        np = noun_prefs[txt_last].get(t.gender, 0)
                        sc[t] += np
                elif tfirst == "fs":
                    if t.has_variant("nf"):
                        # Reduce the weight of the 'artificial' nominative prepositions
                        # 'næstum', 'sem', 'um'
                        sc[t] -= 8  # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5)
                    elif txt == "við" and t.has_variant("þgf"):
                        sc[t] += 1  # Smaller bonus for við + þgf (is rarer than við + þf)
                    elif txt == "sem" and t.has_variant("þf"):
                        sc[t] -= 4
                    elif txt == "á" and t.has_variant("þgf"):
                        sc[t] += 4  # Larger bonus for á + þgf to resolve conflict with verb 'eiga'
                    else:
                        # Else, give a bonus for each matched preposition
                        sc[t] += 2
                elif tfirst == "so":
                    if t.num_variants > 0 and t.variant(0) in "012":
                        # Consider verb arguments
                        # Normally, we give a bonus for verb arguments: the more matched, the better
                        numcases = int(t.variant(0))
                        adj = 2 * numcases
                        # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd'
                        if numcases == 0:
                            # Zero arguments: we might not like this
                            vo0 = VerbObjects.VERBS[0]
                            if all(
                                (m.stofn not in vo0) and (m.ordmynd not in vo0)
                                    and ("MM" not in m.beyging)
                                    for m in tokens[i].t2 if m.ordfl == "so"):
                                # No meaning where the verb has zero arguments
                                # print("Subtracting 5 points for 0-arg verb {0}".format(tokens[i].t1))
                                adj = -5
                        # Apply score adjustments for verbs with particular object cases,
                        # as specified by $score(n) pragmas in Verbs.conf
                        # In the (rare) cases where there are conflicting scores,
                        # apply the most positive adjustment
                        adjmax = 0
                        for m in tokens[i].t2:
                            if m.ordfl == "so":
                                key = m.stofn + t.verb_cases
                                score = VerbObjects.SCORES.get(key)
                                if score is not None:
                                    adjmax = score
                                    break
                        sc[t] += adj + adjmax
                    if t.is_sagnb:
                        # We like sagnb and lh, it means that more
                        # than one piece clicks into place
                        sc[t] += 6
                    elif t.is_lh:
                        # sagnb is preferred to lh, but vb (veik beyging) is discouraged
                        if t.has_variant("vb"):
                            sc[t] -= 2
                        else:
                            sc[t] += 3
                    elif t.is_lh_nt:
                        sc[t] += 12  # Encourage LHNT rather than LO
                    elif t.is_mm:
                        # Encourage mm forms. The encouragement should be better than
                        # the score for matching a single case, so we pick so_0_mm
                        # rather than so_1_þgf, for instance.
                        sc[t] += 3
                    elif t.is_vh:
                        # Encourage vh forms
                        sc[t] += 2
                    if t.is_subj:
                        # Give a small bonus for subject matches
                        if t.has_variant("none"):
                            # ... but a punishment for subj_none
                            sc[t] -= 3
                        else:
                            sc[t] += 1
                    if t.is_nh:
                        if (i > 0) and any(pt.first == 'nhm'
                                           for pt in finals[i - 1]):
                            # Give a bonus for adjacent nhm + so_nh terminals
                            sc[t] += 4  # Prop up the verb terminal with the nh variant
                            for pt in scores[i - 1].keys():
                                if pt.first == 'nhm':
                                    # Prop up the nhm terminal
                                    scores[i - 1][pt] += 2
                                    break
                        if any(pt.first == "no" and pt.has_variant("ef")
                               and pt.is_plural for pt in s):
                            # If this is a so_nh and an alternative no_ef_ft exists, choose this one
                            # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða')
                            sc[t] += 4
                    if (i > 0) and tokens[i].is_upper:
                        # The token is uppercase and not at the start of a sentence:
                        # discourage it from being a verb
                        sc[t] -= 4
                elif tfirst == "tala":
                    if t.has_variant("ef"):
                        # Try to avoid interpreting plain numbers as possessives
                        sc[t] -= 4
                elif tfirst == "person":
                    if t.has_variant("nf"):
                        # Prefer person names in the nominative case
                        sc[t] += 2
                elif tfirst == "sérnafn":
                    if not tokens[i].t2:
                        # If there are no BÍN meanings, we had no choice but to use sérnafn,
                        # so alleviate some of the penalty given by the grammar
                        sc[t] += 4
                    else:
                        # BÍN meanings are available: discourage this
                        # print(f"Discouraging sérnafn {txt}, BÍN meanings are {tokens[i].t2}")
                        sc[t] -= 10
                        if i == w.start:
                            # First token in sentence, and we have BÍN meanings:
                            # further discourage this
                            sc[t] -= 6
                elif tfirst == "fyrirtæki":
                    # We encourage company names to be interpreted as such,
                    # so we give company abbreviations ('hf.', 'Corp.', 'Limited')
                    # a high priority
                    sc[t] += 24
                elif tfirst == "st" or (tfirst == "sem"
                                        and t.colon_cat == "st"):
                    if txt == "sem":
                        # Discourage "sem" as a pure conjunction (samtenging)
                        # (it does not get a penalty when occurring as
                        # a connective conjunction, 'stt')
                        sc[t] -= 6
                elif tfirst == "abfn":
                    # If we have number and gender information with the reflexive
                    # pronoun, that's good: encourage it
                    sc[t] += 6 if t.num_variants > 1 else 2
                elif tfirst == "gr":
                    # Encourage separate definite article rather than pronoun
                    sc[t] += 2
                elif t.name[0] in "\"'":
                    # Give a bonus for exact or semi-exact matches
                    sc[t] += 1

        return scores

Example #19

Show file

File: 4write-uT-resumedat.py Project: genbtc/whatCD-API-utorrent-renamer

def main():
    ss = Preferences()

    oldfile = open(ss.get("utresumedat"), "rb").read()
    newfile = open(os.path.join(ss.get("maindir"), u"NEWDAT.dat"), "wb")
    namesandhashfile = open(ss.getwpath("outpath3"), "rb").readlines()

    beforeafterpath = ss.getwpath(
        "outpath4"
    )  # this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt)

    torrentlist = bencode.bdecode(oldfile)

    # These two things interfere with the processing on the next line
    fileguarduseless = torrentlist.pop(".fileguard", None)
    rec = torrentlist.pop("rec", None)  # Remove this.
    # (dict. comprehension expects only dicts as the root keys)
    # create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-)
    reverselookup = {
        base64.b16encode(value["info"]): [key, value["caption"], value["path"]]
        for key, value in torrentlist.iteritems()
    }

    listofbeforeafter = []
    # to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile
    for eachline in namesandhashfile:
        nameandhash = eachline.strip().split(
            " / "
        )  # strip out the \n with strip() and split on the " / " i put there as a seperator.
        theNewname = nameandhash[0]
        thehash = nameandhash[1]
        # searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if thehash in reverselookup:
            key = reverselookup[thehash][0]
            theOldPath = torrentlist[key]["path"]
            theNewPath = os.path.join(os.path.dirname(theOldPath), theNewname)
            if theOldPath != theNewPath:
                listofbeforeafter.append(
                    [theOldPath, theNewPath, thehash]
                )  # make a list of a list (stringtoOutputtoFile=[0], hash=[1])

    # sort, then write file detailing changes to path (before / after)
    listofbeforeafter.sort()
    beforeafterfile = open(beforeafterpath, "wb")
    for eachline in listofbeforeafter:
        try:
            beforeafterfile.write(
                eachline[0] + " / " + eachline[2] + "\n"
            )  # write oldpath + hash on 1st line    /The hash is duplicated for error checking in case the user accidentally bungles a character while editing...
            beforeafterfile.write(eachline[1] + " / " + eachline[2] + "\n")  # write newpath + hash on 2nd line   /
        except:
            print "Error writing the before+after file, probably a encoding/unicode error: \n", eachline[
                0
            ], "\n", eachline[1]
            print "This was a fatal error and program could not continue."
            return
    beforeafterfile.close()

    # At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file
    raw_input(
        "Press Enter to begin Renaming files.......\\> "
    )  # wait for the user to press Enter before continuing with anything.

    # WRITE TORRENT RESUME.DAT
    beforeafterfile = open(beforeafterpath, "rb").readlines()
    for i in xrange(0, len(beforeafterfile), 2):
        beforeandhash = beforeafterfile[i].strip().split(" / ")
        afterandhash = beforeafterfile[i + 1].strip().split(" / ")
        before = beforeandhash[0].decode("utf-8")
        beforehash = beforeandhash[1]
        after = afterandhash[0].decode("utf-8")
        afterhash = afterandhash[1]
        if beforehash == afterhash:
            thehash = beforehash
        else:
            print "Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries."
            print "Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it..."
            print "Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator."
        # searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid]
        if thehash in reverselookup:
            key = reverselookup[thehash][0]
            torrentlist[key]["caption"] = after[after.rfind("\\") + 1 :]
            try:
                # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines).  (tip:) to show incremental numbers use (((i+1)/2)+1)
                # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying)
                print i, before.encode("ascii", errors="ignore")
                print i + 1, after.encode("ascii", errors="ignore")
                os.rename(before, after)
            except Exception as e:
                traceback.print_exc()  # will output any errors to console but keep going
            torrentlist[key]["path"] = after
            if after.endswith(".mp3") or after.endswith(
                ".flac"
            ):  # .mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner
                if torrentlist[key].has_key(
                    "targets"
                ):  # these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough
                    torrentlist[key]["targets"][0][1] = after[
                        after.rfind("\\") + 1 :
                    ]  # single-file-mode torrents have a "targets" list that controls the filename

    torrentlist["rec"] = rec  # add the thing we removed back in so we dont break anything (not sure what this is)
    # fileguard does not need to go back, in fact, purposefully needs to stay out.
    newfile.write(bencode.bencode(torrentlist))
    newfile.close()
    print "Finished writing: ", newfile.name