def main(): ss = Preferences() # settings.1py directory_path = os.path.join( ss.get("maindir"), u"All-Torrs\\" ) # needs a unicode symbol so os. commands work at all on paths with funny chars files = [ os.path.join(directory_path, fn) for fn in next(os.walk(directory_path))[2] ] # gives absolute paths + names torrentnamelist = [] for eachfile in files: with open(eachfile, "rb") as stringfile: try: torrent = bencode.decode(stringfile.read()) for key, value in torrent.iteritems(): if key == "announce": announce = value domain = "{uri.netloc}".format(uri=urlparse(announce)) colon = domain.find(":", 0) if colon != -1: domain = domain[:colon] if domain: tracker = domain # only using 1 value here(lazy) elif key == "announce-list": tracker = "Multiple Trackers" except: tracker = "None" torrentfilename = eachfile[eachfile.rfind("\\") + 1 :] if not os.path.exists(directory_path + tracker): os.makedirs(directory_path + tracker) os.rename(eachfile, os.path.join(directory_path + tracker + "\\" + torrentfilename))
def main(): ss = Preferences() torrentlist = bencode.decode_from_file(ss.get("utresumedat")) partiallist = [ ] # set up an empty container for desired data to get put into for later fileguarduseless = torrentlist.pop(".fileguard", None) rec = torrentlist.pop("rec", None) #Remove this. #(dict. comprehension expects only dicts as the root keys) #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup = { base64.b16encode(value["info"]): [value["path"], value["caption"], origkey] for origkey, value in torrentlist.iteritems() } for thehash, value in reverselookup.iteritems(): partiallist.append([value[0], value[1], thehash]) partiallist.sort() writelistfile = open( os.path.join(ss.get("maindir"), "TorrentList.txt"), 'wb') # write-out a text file with one entry per line. for eachline in partiallist: writelistfile.write(eachline[0] + " / " + eachline[1] + " / " + eachline[2] + "\n") #path / #caption / #infohash writelistfile.close() print "Finished writing: TorrentList.txt"
def main(): ss = Preferences() script1sourcedir = ss.getwpath("script1sourcedir") #("seeding\") files = [os.path.join(script1sourcedir,filename) for filename in next(os.walk(script1sourcedir))[2]] #gives absolute paths + names currentfile = 0 container = [] #set up an empty container for desired data to get put into for later for eachfile in files: metainfo = decoder.decode_from_file(eachfile) # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash infodict = metainfo[b'info'] info_hash = hashlib.sha1(encode.encode(infodict)).hexdigest().upper() internalname = infodict[b'name'] torrentfilename = eachfile[eachfile.rfind("\\")+1:] locextension = torrentfilename.find(".torrent") #location of extension (char position) locid = torrentfilename.rfind("-")+1 #location of torrentID (char position) torrentid = torrentfilename[locid:locextension] #grab torrentID container.append([torrentfilename, internalname, info_hash, torrentid]) currentfile += 1 print(currentfile, torrentfilename.encode('ascii', errors='ignore').decode()) #console output is ascii only, cannot print unicode - chars are omitted #WRITE FILE 1 writelistfile = codecs.open(ss.getwpath("outpath1"),'wb',"utf-8") # write-out a text file with torrentID and Hash (on one line) ("1seeding_ID+Hash+Filename.txt") for eachline in container: writelistfile.write(eachline[3] + " / " + eachline[2] + " / " + eachline[0] + "\n") #output torrentID / Hash / torrentfilename writelistfile.close()
def main(): ss = Preferences() torrentlist = bencode.decode_from_file(ss.get("utresumedat")) partiallist = [] # set up an empty container for desired data to get put into for later fileguarduseless = torrentlist.pop(b".fileguard",None) rec = torrentlist.pop(b"rec",None) #Remove this. #(dict. comprehension expects only dicts as the root keys) #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup={base64.b16encode(value[b"info"]):[value[b"path"],value[b"caption"],origkey] for origkey,value in torrentlist.items()} for thehash,value in reverselookup.items(): partiallist.append([value[0].decode('utf-8'),value[1].decode('utf-8'),thehash.decode('utf-8')]) #Those 3 lines replace all of this: # for key,value in torrentlist.items(): # sentinel = False # reset before each while-loop # if b"path" in value: # path = value[b"path"].decode('utf-8') # if b"caption" in value: # caption = value[b"caption"].decode('utf-8') # if b"info" in value: # infoHash = base64.b16encode(value[b"info"]).decode('utf-8') # sentinel = True # need this because theres other dictionaries INside each file-entries' dict... # # and this will trigger the partiallist.append to write only file-entry dicts. # if sentinel == True: # partiallist.append([path,caption,infoHash]) partiallist.sort() writelistfile = open(os.path.join(ss.get("maindir"),"TorrentList.txt"),'w',encoding='utf-8') # write-out a text file with one entry per line. for eachline in partiallist: writelistfile.write(eachline[0] + " / " + eachline[1] + " / " + eachline[2] + "\n") #path / #caption / #infohash writelistfile.close()
def main(): ss = Preferences() directory_path = ss.getwpath("script3destdir") #("hash-grabs-as-filenames" dir) allfiles = [os.path.join(directory_path,fn) for fn in next(os.walk(directory_path))[2]] #gives absolute paths + names writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8") #("3propernames.txt" file) for hashidfilename in allfiles: #iterate through filenames of what.cd JSON data with open(hashidfilename,'r') as stringfile: #open them response = json.load(stringfile) torrentHash= response["torrent"]["infoHash"] #grab the hash To compare. writelistfile.write(hashidfilename[hashidfilename.rfind("\\")+1:] + " / " + torrentHash + "\n") #File Output. The Master List file of the names and hashes. writelistfile.close()
def main(): ss = Preferences() torrentlist = bencode.decode_from_file(ss.get("utresumedat")) partiallist = [] # set up an empty container for desired data to get put into for later fileguarduseless = torrentlist.pop(".fileguard",None) rec = torrentlist.pop("rec",None) #Remove this. #(dict. comprehension expects only dicts as the root keys) #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup={base64.b16encode(value["info"]):[value["path"],value["caption"],origkey] for origkey,value in torrentlist.iteritems()} for thehash,value in reverselookup.iteritems(): partiallist.append([value[0],value[1],thehash]) partiallist.sort() writelistfile = open(os.path.join(ss.get("maindir"),"TorrentList.txt"),'wb') # write-out a text file with one entry per line. for eachline in partiallist: writelistfile.write(eachline[0] + " / " + eachline[1] + " / " + eachline[2] + "\n") #path / #caption / #infohash writelistfile.close() print "Finished writing: TorrentList.txt"
def main(): ss = Preferences() #settings.1py directory_path = os.path.join( ss.get("maindir"), u"All-Torrs\\" ) #needs a unicode symbol so os. commands work at all on paths with funny chars files = [ os.path.join(directory_path, fn) for fn in next(os.walk(directory_path))[2] ] #gives absolute paths + names torrentnamelist = [] for eachfile in files: with open(eachfile, 'rb') as stringfile: try: torrent = bencode.decode(stringfile.read()) for key, value in torrent.iteritems(): if key == "announce": announce = value domain = '{uri.netloc}'.format(uri=urlparse(announce)) colon = domain.find(':', 0) if colon != -1: domain = domain[:colon] if domain: tracker = domain #only using 1 value here(lazy) elif key == "announce-list": tracker = "Multiple Trackers" except: tracker = "None" torrentfilename = eachfile[eachfile.rfind("\\") + 1:] if not os.path.exists(directory_path + tracker): os.makedirs(directory_path + tracker) os.rename( eachfile, os.path.join(directory_path + tracker + "\\" + torrentfilename))
def main(): ss = Preferences() currentline = 0 #to resume a broken download. set this to the last SUCCESSFUL number (due to 1 starting at 0) that you see was outputted to console try: cookies = pickle.load( open(ss.getwpath("cookiesfile"), 'rb')) #cookies speed up the HTTP (supposedly) except: cookies = None #if we cant load it, don't use it. credentials = open(ss.getwpath("credentialsfile"), 'rb').readlines( ) #store credentials in another file and .git-ignore it username = credentials[0].strip() password = credentials[1].strip() apihandle = whatapi.WhatAPI(config_file=None, username=username, password=password, cookies=cookies) filenamewithIDs = ss.getwpath( "outpath1") # ("1seeding_ID+Hash+Filename.txt") hashdir = ss.getwpath("script2destdir") #output dir openedfile = open(filenamewithIDs, 'r', encoding='utf-8').readlines() for eachline in islice(openedfile, currentline, None): #will continue where it left off idandhash = eachline.strip().split(' / ') currentID = idandhash[0] currentHash = idandhash[1] if not os.path.exists(os.path.join(hashdir, currentHash)): #currentHash = "E7A5718EC52633FCCB1EA85656AA0622543994D7" #test hash for debugging try: response = apihandle.request(1.75, "torrent", id=currentID)[ "response"] #talk to server and receive a response except whatapi.RequestException as e: currentline += 1 print(currentline, " ERROR. Your search did not match anything.") continue with open(os.path.join(hashdir, currentHash), 'w') as outfile: json.dump(response, outfile, sort_keys=True) currentline += 1 print(currentline, ": ", currentID) pickle.dump(apihandle.session.cookies, open(ss.getwpath("cookiesfile"), 'wb')) #store cookies when script ends, for next-run. print("Download Complete.")
def main(): ss = Preferences() currentline = ( 0 ) # to resume a broken download. set this to the last SUCCESSFUL number (due to 1 starting at 0) that you see was outputted to console try: cookies = pickle.load(open(ss.getwpath("cookiesfile"), "rb")) # cookies speed up the HTTP (supposedly) except: cookies = None # if we cant load it, don't use it. credentials = open( ss.getwpath("credentialsfile"), "rb" ).readlines() # store credentials in another file and .git-ignore it username = credentials[0].strip() password = credentials[1].strip() apihandle = whatapi.WhatAPI(config_file=None, username=username, password=password, cookies=cookies) filenamewithIDs = ss.getwpath("outpath1") # ("1seeding_ID+Hash+Filename.txt") hashdir = ss.getwpath("script2destdir") # output dir openedfile = open(filenamewithIDs, "r").readlines() for eachline in islice(openedfile, currentline, None): # will continue where it left off idandhash = eachline.strip().split(" / ") currentID = idandhash[0] currentHash = idandhash[1] if not os.path.exists(os.path.join(hashdir, currentHash)): # currentHash = "E7A5718EC52633FCCB1EA85656AA0622543994D7" #test hash for debugging try: response = apihandle.request(0, "torrent", hash=currentHash)[ "response" ] # talk to server and receive a response. the 0 means time.sleep(0). except whatapi.RequestException as e: currentline += 1 print currentline, " ERROR. Your search did not match anything." continue with open(os.path.join(hashdir, currentHash), "w") as outfile: json.dump(response, outfile, sort_keys=True) currentline += 1 print currentline, ": ", currentHash pickle.dump( apihandle.session.cookies, open(ss.getwpath("cookiesfile"), "wb") ) # store cookies when script ends, for next-run. print "Download Complete."
def main(): ss = Preferences() script1sourcedir = ss.getwpath( u"script1sourcedir" ) + u'' #("seeding\"), needs unicode u for file opening. files = [ os.path.join(script1sourcedir, filename) for filename in next(os.walk(script1sourcedir))[2] ] #gives absolute paths + names currentfile = 0 container = [ ] #set up an empty container for desired data to get put into for later for eachfile in files: metainfo = bencode.decode_from_file(eachfile) # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash infodict = metainfo['info'] info_hash = hashlib.sha1(bencode.bencode(infodict)).hexdigest().upper() internalname = infodict['name'] torrentfilename = eachfile[eachfile.rfind("\\") + 1:] locextension = torrentfilename.find( ".torrent") #location of extension (char position) locid = torrentfilename.rfind( "-") + 1 #location of torrentID (char position) torrentid = torrentfilename[locid:locextension] #grab torrentID torrentfilename = torrentfilename[:locid - 1] #####-------------replace banned characters with unicode section-----------------###### ### # Forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it. torrentfilename = torrentfilename.replace( " / ", u"/") # U+FFOF (wide) FULLWIDTH SOLIDUS # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth) torrentfilename = torrentfilename.replace( "/", u" ∕ ") # U+2215 (narrow) DIVISION SLASH # Backslash (requires two slashes in python) torrentfilename = torrentfilename.replace( "\\", u"\") # U+FF3C FULLWIDTH REVERSE SOLIDUS # Colon torrentfilename = torrentfilename.replace( ":", u"꞉") # U+A789 MODIFIER LETTER COLON # asterisk torrentfilename = torrentfilename.replace( "*", u"※") # U+203B REFERENCE MARK # question mark (replacement is backwards, sorry) torrentfilename = torrentfilename.replace( "?", u"؟") # U+061F ARABIC QUESTION MARK # Double-quote torrentfilename = torrentfilename.replace( '"', u"ʺ") # U+02BA MODIFIER LETTER DOUBLE PRIME # Left angle bracket torrentfilename = torrentfilename.replace( "<", u"˂") # U+02C2 MODIFIER LETTER LEFT ARROWHEAD # right angle bracket torrentfilename = torrentfilename.replace( ">", u"˃") # U+02C3 MODIFIER LETTER RIGHT ARROWHEAD # Pipe torrentfilename = torrentfilename.replace( "|", u"ǀ") # U+01C0 LATIN LETTER DENTAL CLICK ### #####----------windows filename banned chars replacement with unicode-----------###### container.append([torrentfilename, internalname, info_hash, torrentid]) currentfile += 1 print currentfile, torrentfilename.encode('ascii', errors='ignore') print "\nReminder: Console output is ascii only, Cannot Print Unicode. (chars omitted)" ##File Output. The Master List file of everything.## # when the loop exits, Sort it, and write it to the file. container.sort() writelistfile = codecs.open( ss.getwpath("outpath3"), 'wb', "utf-8" ) # write-out a text file with one entry per line. main output file (3propernames.txt) for eachline in container: writelistfile.write(eachline[0] + " / " + eachline[2] + "\n") #torrentname / infohash writelistfile.close() print "Completed. Unicode File Written to: ", os.path.basename( ss.getwpath("outpath3"))
def _calc_terminal_scores(self, w): """ Calculate the score for each possible terminal/token match """ # First pass: for each token, find the possible terminals that # can correspond to that token finals = defaultdict(set) tokens = dict() self._find_options(w, finals, tokens) # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token scores = dict() # Loop through the indices of the tokens spanned by this tree for i in range(w.start, w.end): s = finals[i] # Initially, each alternative has a score of 0 scores[i] = {terminal: 0 for terminal in s} #print("Reducing token '{0}'; scores dict initialized to:\n{1}".format(tokens[i].t1, scores[i])) if len(s) <= 1: # No ambiguity to resolve here continue # More than one terminal in the option set for the token at index i # Calculate the relative scores # Find out whether the first part of all the terminals are the same same_first = len(set(terminal.first for terminal in s)) == 1 txt = tokens[i].lower # No need to check preferences if the first parts of all possible terminals are equal # Look up the preference ordering from Reynir.conf, if any prefs = None if same_first else Preferences.get(txt) found_pref = False sc = scores[i] if prefs: adj_worse = defaultdict(int) adj_better = defaultdict(int) for worse, better, factor in prefs: for wt in s: if wt.first in worse: for bt in s: if wt is not bt and bt.first in better: if bt.name[0] in "\"'": # Literal terminal: be even more aggressive in promoting it adj_w = -2 * factor adj_b = +6 * factor else: adj_w = -2 * factor adj_b = +4 * factor adj_worse[wt] = min(adj_worse[wt], adj_w) adj_better[bt] = max(adj_better[bt], adj_b) found_pref = True for wt, adj in adj_worse.items(): #print("Token '{2}': Adjusting score of terminal '{0}' by {1}".format(wt, adj, txt)) sc[wt] += adj for bt, adj in adj_better.items(): #print("Token '{2}': Adjusting score of terminal '{0}' by {1}".format(bt, adj, txt)) sc[bt] += adj #if not same_first and not found_pref: # # Only display cases where there might be a missing pref # print("Token '{0}' has {1} possible terminal matches: {2}".format(txt, len(s), s)) # Apply heuristics to each terminal that potentially matches this token for t in s: tfirst = t.first if tfirst == "ao" or tfirst == "eo": # Subtract from the score of all ao and eo sc[t] -= 1 elif tfirst == "no": if t.is_singular: # Add to singular nouns relative to plural ones sc[t] += 1 elif t.is_abbrev: # Punish abbreviations in favor of other more specific terminals sc[t] -= 1 elif tfirst == "fs": if t.has_variant("nf"): # Reduce the weight of the 'artificial' nominative prepositions # 'næstum', 'sem', 'um' sc[t] -= 5 # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5) elif txt == "við" and t.has_variant("þgf"): sc[t] += 1 # Smaller bonus for við + þgf (is rarer than við + þf) elif txt == "sem" and t.has_variant("þf"): sc[t] -= 6 # Even less attractive than sem_nf else: # Else, give a bonus for each matched preposition sc[t] += 2 elif tfirst == "so": if t.variant(0) in "012": # Consider verb arguments # Normally, we give a bonus for verb arguments: the more matched, the better numcases = int(t.variant(0)) adj = 2 * numcases # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd' if numcases == 0: # Zero arguments: we might not like this if all((m.stofn not in VerbObjects.VERBS[0]) and ( "MM" not in m.beyging) for m in tokens[i].t2 if m.ordfl == "so"): # No meaning where the verb has zero arguments adj = -5 # Apply score adjustments for verbs with particular object cases, # as specified by $score(n) pragmas in Verbs.conf # In the (rare) cases where there are conflicting scores, # apply the most positive adjustment adjmax = 0 for m in tokens[i].t2: if m.ordfl == "so": key = m.stofn + t.verb_cases score = VerbObjects.SCORES.get(key) if score is not None: adjmax = score break sc[t] += adj + adjmax if t.is_sagnb: # We like sagnb and lh, it means that more # than one piece clicks into place sc[t] += 6 elif t.is_lh: # sagnb is preferred to lh, but vb (veik beyging) is discouraged if t.has_variant("vb"): sc[t] -= 2 else: sc[t] += 3 elif t.is_mm: # Encourage mm forms. The encouragement should be better than # the score for matching a single case, so we pick so_0_mm # rather than so_1_þgf, for instance. sc[t] += 3 elif t.is_vh: # Encourage vh forms sc[t] += 2 if t.is_subj: # Give a small bonus for subject matches if t.has_variant("none"): # ... but a punishment for subj_none sc[t] -= 3 else: sc[t] += 1 if t.is_nh: if (i > 0) and any(pt.first == 'nhm' for pt in finals[i - 1]): # Give a bonus for adjacent nhm + so_nh terminals sc[t] += 4 # Prop up the verb terminal with the nh variant for pt in scores[i - 1].keys(): if pt.first == 'nhm': # Prop up the nhm terminal scores[i - 1][pt] += 2 # print("Propping up nhm for verb {1}, score is now {0}".format(scores[i-1][pt], tokens[i].t1)) break if any(pt.first == "no" and pt.has_variant("ef") and pt.is_plural for pt in s): # If this is a so_nh and an alternative no_ef_ft exists, choose this one # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða') sc[t] += 4 elif tfirst == "tala" or tfirst == "töl": # A complete 'töl' or 'no' is better (has more info) than a rough 'tala' if tfirst == "tala": sc[t] -= 1 # Discourage possessive ('ef') meanings for numbers for pt in s: if (pt.first == "no" or pt.first == "töl") and pt.has_variant("ef"): sc[pt] -= 1 elif tfirst == "sérnafn": if not tokens[i].t2: # If there are no BÍN meanings, we had no choice but to use sérnafn, # so alleviate some of the penalty given by the grammar sc[t] += 2 else: # BÍN meanings are available: discourage this #print("sérnafn '{0}': BÍN meanings available, discouraging".format(tokens[i].t1)) sc[t] -= 6 if i == w.start: # First token in sentence, and we have BÍN meanings: # further discourage this sc[t] -= 4 #print("Meanings for sérnafn {0}:".format(tokens[i].t1)) #for m in tokens[i].t2: # print("{0}".format(m)) # if m.stofn[0].isupper(): # sc[t] -= 4 # Discourage 'sérnafn' if an uppercase BÍN meaning is available # break elif t.name[0] in "\"'": # Give a bonus for exact or semi-exact matches sc[t] += 1 #for i in range(w.start, w.end): # print("At token '{0}' scores dict is:\n{1}".format(tokens[i].t1, scores[i])) return scores
def go_with_score(self, forest): """ Returns the argument forest after pruning it down to a single tree """ if forest is None: return (None, 0) w = forest # First pass: for each token, find the possible terminals that # can correspond to that token finals = defaultdict(set) tokens = dict() self._find_options(w, finals, tokens) # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token scores = dict() # Loop through the indices of the tokens spanned by this tree for i in range(w.start, w.end): s = finals[i] # Initially, each alternative has a score of 0 scores[i] = { terminal: 0 for terminal in s } if len(s) > 1: # More than one terminal in the option set # Calculate the relative scores # Find out whether the first part of all the terminals are the same same_first = len(set(x.first for x in s)) == 1 txt = tokens[i].lower # No need to check preferences if the first parts of all possible terminals are equal # Look up the preference ordering from Reynir.conf, if any prefs = None if same_first else Preferences.get(txt) found_pref = False sc = scores[i] if prefs: for worse, better, factor in prefs: for wt in s: if wt.first in worse: for bt in s: if wt is not bt and bt.first in better: if bt.name[0] in "\"'": # Literal terminal: be even more aggressive in promoting it sc[wt] -= 2 * factor sc[bt] += 6 * factor else: sc[wt] -= 2 * factor sc[bt] += 4 * factor found_pref = True #if not same_first and not found_pref: # # Only display cases where there might be a missing pref # print("Token '{0}' has {1} possible terminal matches: {2}".format(txt, len(s), s)) # Apply heuristics to each terminal that potentially matches this token for t in s: tfirst = t.first if tfirst == "ao" or tfirst == "eo": # Subtract from the score of all ao and eo sc[t] -= 1 elif tfirst == "no": if t.is_singular: # Add to singular nouns relative to plural ones sc[t] += 1 elif t.is_abbrev: # Punish abbreviations in favor of other more specific terminals sc[t] -= 1 elif tfirst == "fs": if t.has_variant("nf"): # Reduce the weight of the 'artificial' nominative prepositions # 'næstum', 'sem', 'um' sc[t] -= 3 # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5) else: # Else, give a bonus for each matched preposition sc[t] += 2 elif tfirst == "so": if t.variant(0) in "012": # Consider verb arguments # Normally, we give a bonus for verb arguments: the more matched, the better adj = 2 * int(t.variant(0)) # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd' if adj == 0: # Zero arguments: we might not like this for m in tokens[i].t2: if m.ordfl == "so" and m.stofn not in VerbObjects.VERBS[0]: # We're using a verb with zero arguments but that form is not # explicitly listed in Verbs.conf: discourage this # print("Discouraging zero-arg use of verb '{0}' (stem '{1}')".format(txt, m.stofn)) adj = -1 break sc[t] += adj if t.is_sagnb: # We like sagnb and lh, it means that more # than one piece clicks into place sc[t] += 4 elif t.is_lh: # sagnb is preferred to lh, but vb (veik beyging) is discouraged if t.has_variant("vb"): sc[t] -= 2 else: sc[t] += 3 if t.is_subj: # Give a small bonus for subject matches if t.has_variant("none"): # ... but a punishment for subj_none sc[t] -= 3 else: sc[t] += 1 if t.is_nh: if (i > 0) and any(pt.first == 'nhm' for pt in finals[i - 1]): # Give a bonus for adjacent nhm + so_nh terminals sc[t] += 2 # Prop up the verb terminal with the nh variant for pt in scores[i - 1].keys(): if pt.first == 'nhm': # Prop up the nhm terminal scores[i - 1][pt] += 2 if any(pt.first == "no" and pt.has_variant("ef") and pt.is_plural for pt in s): # If this is a so_nh and an alternative no_ef_ft exists, choose this one # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða') sc[t] += 2 elif tfirst == "tala" or tfirst == "töl": # A complete 'töl' or 'no' is better (has more info) than a rough 'tala' if tfirst == "tala": sc[t] -= 1 # Discourage possessive ('ef') meanings for numbers for pt in s: if (pt.first == "no" or pt.first == "töl") and pt.has_variant("ef"): sc[pt] -= 1 elif tfirst == "sérnafn": if tokens[i].t2: sc[t] -= 20 # Base penalty is -20 for m in tokens[i].t2: sc[t] -= 1 # Subtract one for each BÍN meaning available if m.stofn[0].isupper(): sc[t] -= 8 # Heavily discourage 'sérnafn' if an uppercase BÍN meaning is available elif t.name[0] in "\"'": # Give a bonus for exact or semi-exact matches sc[t] += 1 # Third pass: navigate the tree bottom-up, eliminating lower-rated # options (subtrees) in favor of higher rated ones score = self._reduce(w, scores) return (w, score)
def main(): global fmttdcatalogueNumber #to fix an issue with scope (line 330,333). ss = Preferences() hashtorrlistfile = ss.getwpath( "outpath1") #("1seeding_ID+Hash+Filename.txt") directory_path = ss.getwpath("script2destdir") #as source dir (hash-grabs) allfiles = [ os.path.join(directory_path, filename) for filename in next(os.walk(directory_path))[2] ] # gives absolute paths + names hashtofilenamefolder = ss.getwpath( "script3destdir") #as dest dir (hash-grabs-as-filenames) writelistfile = codecs.open( ss.getwpath("outpath3"), 'wb', "utf-8" ) # write-out a text file with one entry per line. main output file (3propernames.txt) writelistcontainer = [] currentfilenumber = 1 for hashidfilename in allfiles: # iterate through filenames of what.cd JSON data with open(hashidfilename, 'r') as stringfile: # open them needFixLabeltoNewEdition = False jsonresponse = json.load(stringfile) tor = Torrent(jsonresponse) if tor.group.categoryName != "Music": continue # do not continue altering any non-music torrents. releaseTypeName = ReleaseType( tor.group.releaseType ).name # turn int. value into a string using the enum class above fmttdreleaseTypeName = "[" + releaseTypeName + "]" if tor.torrent.remastered: if tor.torrent.remasterTitle: tor.group.name += " (" + tor.torrent.remasterTitle + ")" if tor.torrent.remasterYear > tor.group.year: tor.group.year = tor.torrent.remasterYear if tor.torrent.remasterRecordLabel: if tor.group.recordLabel.lower( ) != tor.torrent.remasterRecordLabel.lower( ): # so not case sensitive if not tor.group.recordLabel: tor.group.recordLabel = tor.torrent.remasterRecordLabel else: # then things get complicated and we need to figure out which Label/catalog field is the best one to use, or combine them or both needFixLabeltoNewEdition = True # if its been determined that its a remaster (new edition), process new label and catalog # checking whether to combine with old, or which one to use, etc, etc..... if needFixLabeltoNewEdition == True: score = difflib.SequenceMatcher( None, tor.group.recordLabel.lower(), tor.torrent.remasterRecordLabel.lower()).ratio() if (score < 0.5): # considered similar at 0.6 but this way is not that accurate. if they are lower than 0.5 similar, just use the new one tor.group.recordLabel = tor.torrent.remasterRecordLabel if tor.torrent.remasterCatalogueNumber: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber elif all(word in tor.torrent.remasterRecordLabel.lower() for word in tor.group.recordLabel.lower()) \ or \ all(word in tor.group.recordLabel.lower() for word in tor.torrent.remasterRecordLabel.lower()): # If all the words in the old label is encompassed in the new one, use the new one. # This would mean the new edition record label is most likely longer and is similar enough to use that, # and preferred, since its more applicable to this specific release anyway. # Even if the reverse is true, this code-block should only catch labels that differ in slight ways.(?) # example 1: originallabel={Big Beat Records} remasterlabel={Big Beat} #elif new label in old label # result: {Big Beat} #or # example 2: originallabel={Big Beat} remasterlabel={Big Beat Records} #if old label in new label # result: {Big Beat Records} #then tor.group.recordLabel = tor.torrent.remasterRecordLabel # always choose new label. if tor.torrent.remasterCatalogueNumber: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber else: # This else-block is used when the above are not true, and we can't decide on which one to use, so we use both. Combined. # example 1: originallabel={Island Records} remasterlabel={Island Records / Lokal Legend} # result: {Island Records / Lokal Legend} # example 2: originallabel {Wall Recordings} remasterlabel={Tiger Records} # result: {Wall Recordings / Tiger Records} new = "" splitorig = re.sub( "[(,),-]", " ", tor.group.recordLabel).split( ) # remove delimiter chars that mess up stuff sepchar = ["/"] splitnew = re.sub( "[(,),-]", " ", tor.torrent.remasterRecordLabel ).split() # turn everything into a list new = " ".join([ "%s" % (v) for v in getUniqueWords(splitorig + sepchar + splitnew) ]) # append unique words to the orig. if tor.torrent.remasterCatalogueNumber: if tor.group.catalogueNumber != tor.torrent.remasterCatalogueNumber: if tor.group.catalogueNumber: tor.group.catalogueNumber += " / " + tor.torrent.remasterCatalogueNumber else: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber tor.group.recordLabel = new # ntpath.basename was really slow so doing it manually.... (32 times faster)= 0.128 seconds vs 0.004 seconds # whats happening here is due to an exponential nested for loop, ie: 4129 results^2 = 17 million function calls of either basename or .rfind('\\') # there should be a better way to do this. hashfilesepidloc = hashidfilename.rfind("\\") + 1 cmphashfn = hashidfilename[hashfilesepidloc:] iterhashfile = open( hashtorrlistfile, 'rb').readlines() # read everything into memory for i in iterhashfile: # read line splitline = i.strip().split( ' / ') # 0 torrentID / 1 Hash / 2 torrentfilename if splitline[ 1] == cmphashfn: # if it matches, start processing newEntry = TorrentEntry() # instanciate class newEntry.hash = splitline[1] # store Hash for reference newEntry.pathname = splitline[2].decode( "utf-8") # filename + extension locextension = newEntry.pathname.find( ".torrent") # location of extension locid = newEntry.pathname.rfind( "-") + 1 # location of tor.torrent.id newEntry.filename = newEntry.pathname[: locextension] # chop the extension off (manually) newEntry.artistalbum = newEntry.filename[:locid - 1] # JUST the name (no ID#) newEntry.torrentid = newEntry.filename[ locid: locextension] # grab ID for future reference (tor.torrent.id on what.cd) # example : S-Type - Billboard (Lido Remix) - 2014 (WEB - MP3 - 320) newEntry.artist = newEntry.artistalbum[:newEntry. artistalbum.find( " - " )] # grab artist tempalbum = newEntry.artistalbum[ newEntry.artistalbum.find(" - ") + 3:] # temp value helps with string processing newEntry.album = tempalbum[:tempalbum.find( " - " )] # not needed since it can be pulled from [group] newEntry.year = tempalbum[ tempalbum.find(" - ") + 3:tempalbum.find(" - ") + 7] # not needed since it can be pulled from [group] # ------------Recreate name------------# # -------Special RULES SECTION---------# newEntry.createdpropername = newEntry.artist + u" - " + tor.group.name + " " if tor.group.releaseType > 1: # dont put it for Album or Unspecified if tor.group.releaseType != 5: # do something different for EP newEntry.createdpropername += fmttdreleaseTypeName + " " else: # make a rule so [EP] doesnt come up if there is " EP " already if tor.group.name[-2:] != "EP": newEntry.createdpropername += fmttdreleaseTypeName + " " newEntry.createdpropername += "(" + str( tor.group.year) + ")" # written like this for easy humanreading # format = MP3, FLAC, AAC, # media = cd, web, vinyl, soundboard, dat # encoding = lossless,320,v0,256,v2,192 if tor.torrent.format == "FLAC": newEntry.fmttdMediaEncodeFormat = "FLAC" # log and logscore only applicable to flac. if tor.torrent.hasLog: newEntry.fmttdMediaEncodeFormat += " " + str( tor.torrent.logScore ) + "%" # the % implies "log" so leave out the word log if tor.torrent.format == "AAC": if (any("itunes" in word.lower() for word in tor.torrent.description.split()) ) or (any( "itunes" in word.lower() for word in tor.torrent.filePath.split())): newEntry.fmttdMediaEncodeFormat = "iTunes " newEntry.fmttdMediaEncodeFormat += "AAC" if tor.torrent.format == "MP3": # dont actually write mp3 # only write Scene or WEB if it is an mp3 if tor.torrent.scene: newEntry.fmttdMediaEncodeFormat += "Scene" elif tor.torrent.media == "WEB": newEntry.fmttdMediaEncodeFormat += "WEB" else: newEntry.fmttdMediaEncodeFormat += tor.torrent.media if "VBR" in tor.torrent.encoding: newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding[: 2] else: newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding newEntry.fmttdMediaEncodeFormat = "[" + newEntry.fmttdMediaEncodeFormat + "]" newEntry.createdpropername += " " + newEntry.fmttdMediaEncodeFormat # put catalog number in brackets if tor.group.catalogueNumber: fmttdcatalogueNumber = ("[" + tor.group.catalogueNumber + "]").replace(" ", "").upper() elif tor.group.recordLabel: fmttdcatalogueNumber = "[" + tor.group.recordLabel + "]" # combines with next part to put recordLabel in the front if Cat# missing if any(word in tor.group.recordLabel.lower() for word in Sorted_Record_Labels_List): # so not case sensitive newEntry.createdpropername = fmttdcatalogueNumber + " " + newEntry.createdpropername elif tor.group.recordLabel: newEntry.createdpropername += " " + "{" + tor.group.recordLabel + "}" # if tor.group.catalogueNumber: #This will put [CATA###] after all releases, even labels not in your list # newEntry.createdpropername += " " + fmttdcatalogueNumber #Gets kind of cumbersome for me. (uncomment to use it anyway) # these 2 lines are a quick fix, for an oversight in my naming process # if these are single file .mp3's (or a single .flac) they will need a .mp3 at the end of the filename if not tor.torrent.filePath: newEntry.createdpropername += "." + tor.torrent.format.lower( ) try: print currentfilenumber, newEntry.createdpropername.encode( 'ascii', errors='ignore') except: print "COULD NOT PRINT UNICODE FILENAME TO CONSOLE. HASH=", tor.torrent.infoHash ########-------------replace characters section----------------######### newEntry.createdpropername = newEntry.createdpropername.replace( "\\", u"\") # U+FF3C FULLWIDTH REVERSE SOLIDUS # these forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it. newEntry.createdpropername = newEntry.createdpropername.replace( " / ", u"/") # U+FFOF (wide) FULLWIDTH SOLIDUS # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth) newEntry.createdpropername = newEntry.createdpropername.replace( "/", u" ∕ ") # U+2215 (narrow) DIVISION SLASH newEntry.createdpropername = newEntry.createdpropername.replace( ":", u"꞉") # U+A789 MODIFIER LETTER COLON newEntry.createdpropername = newEntry.createdpropername.replace( "*", u"※") # U+203B REFERENCE MARK newEntry.createdpropername = newEntry.createdpropername.replace( "?", u"؟") # U+061F ARABIC QUESTION MARK newEntry.createdpropername = newEntry.createdpropername.replace( '"', u"ʺ" ) # U+02BA MODIFIER LETTER DOUBLE PRIME newEntry.createdpropername = newEntry.createdpropername.replace( "<", u"˂" ) # U+02C2 MODIFIER LETTER LEFT ARROWHEAD newEntry.createdpropername = newEntry.createdpropername.replace( ">", u"˃" ) # U+02C3 MODIFIER LETTER RIGHT ARROWHEAD newEntry.createdpropername = newEntry.createdpropername.replace( "|", u"ǀ") # U+01C0 LATIN LETTER DENTAL CLICK #####--windows filename banned chars replacement with unicode--######### ######----------HashGrabs-as-Filenames--------######## # File output. Move all files named as hashes to a new dir as the proper name if not os.path.exists(hashtofilenamefolder + newEntry.createdpropername): shutil.copy( hashidfilename, hashtofilenamefolder + newEntry.createdpropername) currentfilenumber += 1 #####------------make propernames.txt (has the hash in it also) ---------######## # Add it to the container (since this is in a loop) writelistcontainer.append(newEntry.createdpropername + " / " + tor.torrent.infoHash + "\n") ##File Output. The Master List file of everything.## # when the loop exits, Sort it, and write it to the file. writelistcontainer.sort() for eachline in writelistcontainer: writelistfile.write(eachline) writelistfile.close()
def main(): ss = Preferences() newfile = open(os.path.join(ss.get("maindir"), "NEWDAT.dat"), 'wb') namesandhashfile = open( ss.getwpath("outpath3"), 'r', encoding='utf-8').readlines() #("3propernames.txt") beforeafterpath = ss.getwpath( "outpath4" ) #this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt) #torrentlist = decoder.decode_from_file(ss.get("utresumedat")) #works 10.645s 12315181 function calls #torrentlist = bencode2en.decode_from_file(ss.get("utresumedat")) #works 8.462s 13745202 function calls torrentlist = bencode.decode_from_file( ss.get("utresumedat")) #works 8.057ss 10908143 function calls #These two things interfere with the processing on the next line fileguarduseless = torrentlist.pop(b".fileguard", None) rec = torrentlist.pop(b"rec", None) #Remove this. #(dict. comprehension expects only dicts as the root keys) #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup = { base64.b16encode(value[b"info"]): [key, value[b"caption"], value[b"path"]] for key, value in torrentlist.items() } listofbeforeafter = [] #to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile for eachline in namesandhashfile: nameandhash = eachline.strip().split( ' / ' ) #strip out the \n with strip() and split on the " / " i put there as a seperator. theNewname = nameandhash[0] thehash = nameandhash[1] #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if bytes(thehash, 'utf-8') in reverselookup: key = reverselookup[bytes(thehash, 'utf-8')][0] theOldPath = torrentlist[key][b"path"].decode('utf-8') theNewPath = os.path.join(os.path.dirname(theOldPath), theNewname) if theOldPath != theNewPath: listofbeforeafter.append( [theOldPath, theNewPath, thehash] ) # make a list of a list (stringtoOutputtoFile=[0], hash=[1]) #sort, then write file detailing changes to path (before / after) listofbeforeafter.sort() beforeafterfile = open(beforeafterpath, 'w', encoding='utf-8') for eachline in listofbeforeafter: beforeafterfile.write( eachline[0] + " / " + eachline[2] + "\n" ) #write oldpath + hash on 1st line /The hash is duplicated for error checking in case the user accidentally bungles a character while editing... beforeafterfile.write(eachline[1] + " / " + eachline[2] + "\n") #write newpath + hash on 2nd line / beforeafterfile.close() #At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file input("Press Enter to begin Renaming files.......\\> " ) #wait for the user to press Enter before continuing with anything. #WRITE TORRENT RESUME.DAT beforeafterfile = open(beforeafterpath, 'r', encoding='utf-8').readlines() for i in range(0, len(beforeafterfile), 2): beforeandhash = beforeafterfile[i].strip().split(' / ') afterandhash = beforeafterfile[i + 1].strip().split(' / ') before = beforeandhash[0] beforehash = beforeandhash[1] after = afterandhash[0] afterhash = afterandhash[1] if beforehash == afterhash: thehash = beforehash else: print( "Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries." ) print( "Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it..." ) print( "Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator." ) #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if bytes(thehash, 'utf-8') in reverselookup: key = reverselookup[bytes(thehash, 'utf-8')][0] torrentlist[key][b"caption"] = bytes(after[after.rfind("\\") + 1:], 'utf-8') try: # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines). (tip:) to show incremental numbers use (((i+1)/2)+1) # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying) print(i, before.encode('ascii', errors='ignore').decode()) print(i + 1, after.encode('ascii', errors='ignore').decode()) os.rename(before, after) except Exception as e: traceback.print_exc( ) #will output any errors to console but keep going torrentlist[key][b"path"] = bytes(after, 'utf-8') if after.endswith(".mp3") or after.endswith( ".flac" ): #.mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner if b"targets" in torrentlist[ key]: #these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough torrentlist[key][b"targets"][0][1] = torrentlist[key][ b"caption"] #single-file-mode torrents have a "targets" list that controls the filename torrentlist[ b"rec"] = rec #add the thing we removed back in so we dont break anything (not sure what this is) #fileguard does not need to go back, in fact, purposefully needs to stay out. #newfile.write(encode.encode(torrentlist)) #works 10.295s 15361310 function calls #newfile.write(bencode2en.bencode2(torrentlist)) #v.slow 31.872s 12452142 function calls #newfile.write(bencode2en.bencode4(torrentlist)) #works 7.864s 10906619 function calls newfile.write( bencode.bencode(torrentlist)) #works 7.699s 10906619 function calls newfile.close() print( "\nPlease note that the filenames shown are missing any unicode characters due to Windows Command Prompt limitations." ) print("Finished writing: ", newfile.name)
def main(): ss = Preferences() newfile = open(os.path.join(ss.get("maindir"),"NEWDAT.dat"),'wb') namesandhashfile = open(ss.getwpath("outpath3"),'r',encoding='utf-8').readlines() #("3propernames.txt") beforeafterpath = ss.getwpath("outpath4") #this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt) #torrentlist = decoder.decode_from_file(ss.get("utresumedat")) #works 10.645s 12315181 function calls #torrentlist = bencode2en.decode_from_file(ss.get("utresumedat")) #works 8.462s 13745202 function calls torrentlist = bencode.decode_from_file(ss.get("utresumedat")) #works 8.057ss 10908143 function calls #These two things interfere with the processing on the next line fileguarduseless = torrentlist.pop(b".fileguard",None) rec = torrentlist.pop(b"rec",None) #Remove this. #(dict. comprehension expects only dicts as the root keys) #create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup={base64.b16encode(value[b"info"]):[key,value[b"caption"],value[b"path"]] for key,value in torrentlist.items()} listofbeforeafter = [] #to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile for eachline in namesandhashfile: nameandhash = eachline.strip().split(' / ') #strip out the \n with strip() and split on the " / " i put there as a seperator. theNewname = nameandhash[0] thehash = nameandhash[1] #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if bytes(thehash,'utf-8') in reverselookup: key = reverselookup[bytes(thehash,'utf-8')][0] theOldPath = torrentlist[key][b"path"].decode('utf-8') theNewPath = os.path.join(os.path.dirname(theOldPath),theNewname) if theOldPath != theNewPath: listofbeforeafter.append([theOldPath,theNewPath,thehash]) # make a list of a list (stringtoOutputtoFile=[0], hash=[1]) #sort, then write file detailing changes to path (before / after) listofbeforeafter.sort() beforeafterfile = open(beforeafterpath,'w',encoding='utf-8') for eachline in listofbeforeafter: beforeafterfile.write(eachline[0] + " / " + eachline[2] + "\n") #write oldpath + hash on 1st line /The hash is duplicated for error checking in case the user accidentally bungles a character while editing... beforeafterfile.write(eachline[1] + " / " + eachline[2] + "\n") #write newpath + hash on 2nd line / beforeafterfile.close() #At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file input("Press Enter to begin Renaming files.......\\> ") #wait for the user to press Enter before continuing with anything. #WRITE TORRENT RESUME.DAT beforeafterfile = open(beforeafterpath,'r',encoding='utf-8').readlines() for i in range(0, len(beforeafterfile), 2): beforeandhash = beforeafterfile[i].strip().split(' / ') afterandhash = beforeafterfile[i+1].strip().split(' / ') before = beforeandhash[0] beforehash = beforeandhash[1] after = afterandhash[0] afterhash = afterandhash[1] if beforehash == afterhash: thehash = beforehash else: print("Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries.") print("Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it...") print("Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator.") #searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if bytes(thehash,'utf-8') in reverselookup: key = reverselookup[bytes(thehash,'utf-8')][0] torrentlist[key][b"caption"] = bytes(after[after.rfind("\\")+1:],'utf-8') try: # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines). (tip:) to show incremental numbers use (((i+1)/2)+1) # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying) print(i,before.encode('ascii', errors='ignore').decode()) print(i+1,after.encode('ascii', errors='ignore').decode()) os.rename(before, after) except Exception as e: traceback.print_exc() #will output any errors to console but keep going torrentlist[key][b"path"] = bytes(after,'utf-8') if after.endswith(".mp3") or after.endswith(".flac"): #.mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner if b"targets" in torrentlist[key]: #these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough torrentlist[key][b"targets"][0][1] = torrentlist[key][b"caption"] #single-file-mode torrents have a "targets" list that controls the filename torrentlist[b"rec"]=rec #add the thing we removed back in so we dont break anything (not sure what this is) #fileguard does not need to go back, in fact, purposefully needs to stay out. #newfile.write(encode.encode(torrentlist)) #works 10.295s 15361310 function calls #newfile.write(bencode2en.bencode2(torrentlist)) #v.slow 31.872s 12452142 function calls #newfile.write(bencode2en.bencode4(torrentlist)) #works 7.864s 10906619 function calls newfile.write(bencode.bencode(torrentlist)) #works 7.699s 10906619 function calls newfile.close() print("\nPlease note that the filenames shown are missing any unicode characters due to Windows Command Prompt limitations.") print("Finished writing: ", newfile.name)
def main(): global fmttdcatalogueNumber #to fix an issue with scope (line 330,333). ss = Preferences() hashtorrlistfile = ss.getwpath("outpath1") directory_path = ss.getwpath("script2destdir") #as source dir (hash-grabs) allfiles = [os.path.join(directory_path, filename) for filename in next(os.walk(directory_path))[2]] # gives absolute paths + names hashtofilenamefolder = ss.getwpath("script3destdir") #as dest dir (hash-grabs-as-filenames) writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8") # write-out a text file with one entry per line. main output file (3propernames.txt) writelistcontainer = [] currentfilenumber = 1 for hashidfilename in allfiles: # iterate through filenames of what.cd JSON data with open(hashidfilename, 'r') as stringfile: # open them needFixLabeltoNewEdition = False jsonresponse = json.load(stringfile) tor = Torrent(jsonresponse) if tor.group.categoryName != "Music": continue # do not continue altering any non-music torrents. releaseTypeName = ReleaseType(tor.group.releaseType).name # turn int. value into a string using the enum class above fmttdreleaseTypeName = "[" + releaseTypeName + "]" if tor.torrent.remastered: if tor.torrent.remasterTitle: tor.group.name += " (" + tor.torrent.remasterTitle + ")" if tor.torrent.remasterYear > tor.group.year: tor.group.year = tor.torrent.remasterYear if tor.torrent.remasterRecordLabel: if tor.group.recordLabel.lower() != tor.torrent.remasterRecordLabel.lower(): # so not case sensitive if not tor.group.recordLabel: tor.group.recordLabel = tor.torrent.remasterRecordLabel else: # then things get complicated and we need to figure out which Label/catalog field is the best one to use, or combine them or both needFixLabeltoNewEdition = True # if its been determined that its a remaster (new edition), process new label and catalog # checking whether to combine with old, or which one to use, etc, etc..... if needFixLabeltoNewEdition == True: score = difflib.SequenceMatcher(None, tor.group.recordLabel.lower(), tor.torrent.remasterRecordLabel.lower()).ratio() if (score < 0.5): # considered similar at 0.6 but this way is not that accurate. if they are lower than 0.5 similar, just use the new one tor.group.recordLabel = tor.torrent.remasterRecordLabel if tor.torrent.remasterCatalogueNumber: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber elif all(word in tor.torrent.remasterRecordLabel.lower() for word in tor.group.recordLabel.lower()) \ or \ all(word in tor.group.recordLabel.lower() for word in tor.torrent.remasterRecordLabel.lower()): # If all the words in the old label is encompassed in the new one, use the new one. # This would mean the new edition record label is most likely longer and is similar enough to use that, # and preferred, since its more applicable to this specific release anyway. # Even if the reverse is true, this code-block should only catch labels that differ in slight ways.(?) # example 1: originallabel={Big Beat Records} remasterlabel={Big Beat} #elif new label in old label # result: {Big Beat} #or # example 2: originallabel={Big Beat} remasterlabel={Big Beat Records} #if old label in new label # result: {Big Beat Records} #then tor.group.recordLabel = tor.torrent.remasterRecordLabel # always choose new label. if tor.torrent.remasterCatalogueNumber: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber # if tor.torrent.remasterCatalogueNumber is not blank, use it as the new tor.group.catalogueNumber else: # This else-block is used when the above are not true, and we can't decide on which one to use, so we use both. Combined. # example 1: originallabel={Island Records} remasterlabel={Island Records / Lokal Legend} # result: {Island Records / Lokal Legend} # example 2: originallabel {Wall Recordings} remasterlabel={Tiger Records} # result: {Wall Recordings / Tiger Records} new = "" splitorig = re.sub("[(,),-]", " ", tor.group.recordLabel).split() # remove delimiter chars that mess up stuff sepchar = ["/"] splitnew = re.sub("[(,),-]", " ", tor.torrent.remasterRecordLabel).split() # turn everything into a list new = " ".join(["%s" % (v) for v in getUniqueWords( splitorig + sepchar + splitnew)]) # append unique words to the orig. if tor.torrent.remasterCatalogueNumber: if tor.group.catalogueNumber != tor.torrent.remasterCatalogueNumber: if tor.group.catalogueNumber: tor.group.catalogueNumber += " / " + tor.torrent.remasterCatalogueNumber else: tor.group.catalogueNumber = tor.torrent.remasterCatalogueNumber tor.group.recordLabel = new # ntpath.basename was really slow so doing it manually.... (32 times faster)= 0.128 seconds vs 0.004 seconds # whats happening here is due to an exponential nested for loop, ie: 4129 results^2 = 17 million function calls of either basename or .rfind('\\') # there should be a better way to do this. hashfilesepidloc = hashidfilename.rfind("\\") + 1 cmphashfn = hashidfilename[hashfilesepidloc:] iterhashfile = open(hashtorrlistfile, 'r',encoding='utf-8').readlines() # read everything into memory for i in iterhashfile: # read line splitline = i.strip().split(' / ') # 0 torrentID / 1 Hash / 2 torrentfilename if splitline[1] == cmphashfn: # if it matches, start processing newEntry = TorrentEntry() # instanciate class newEntry.hash = splitline[1] # store Hash for reference newEntry.pathname = splitline[2] # filename + extension locextension = newEntry.pathname.find(".torrent") # location of extension locid = newEntry.pathname.rfind("-") + 1 # location of tor.torrent.id newEntry.filename = newEntry.pathname[:locextension] # chop the extension off (manually) newEntry.artistalbum = newEntry.filename[:locid - 1] # JUST the name (no ID#) newEntry.torrentid = newEntry.filename[locid:locextension] # grab ID for future reference (tor.torrent.id on what.cd) # example : S-Type - Billboard (Lido Remix) - 2014 (WEB - MP3 - 320) newEntry.artist = newEntry.artistalbum[:newEntry.artistalbum.find(" - ")] # grab artist tempalbum = newEntry.artistalbum[newEntry.artistalbum.find(" - ") + 3:] # temp value helps with string processing newEntry.album = tempalbum[:tempalbum.find(" - ")] # not needed since it can be pulled from [group] newEntry.year = tempalbum[tempalbum.find(" - ") + 3:tempalbum.find(" - ") + 7] # not needed since it can be pulled from [group] # ------------Recreate name------------# # -------Special RULES SECTION---------# newEntry.createdpropername = newEntry.artist + " - " + tor.group.name + " " if tor.group.releaseType > 1: # dont put it for Album or Unspecified if tor.group.releaseType != 5: # do something different for EP newEntry.createdpropername += fmttdreleaseTypeName + " " else: # make a rule so [EP] doesnt come up if there is " EP " already if tor.group.name[-2:] != "EP": newEntry.createdpropername += fmttdreleaseTypeName + " " newEntry.createdpropername += "(" + str(tor.group.year) + ")" # written like this for easy humanreading # format = MP3, FLAC, AAC, # media = cd, web, vinyl, soundboard, dat # encoding = lossless,320,v0,256,v2,192 if tor.torrent.format == "FLAC": newEntry.fmttdMediaEncodeFormat = "FLAC" # log and logscore only applicable to flac. if tor.torrent.hasLog: newEntry.fmttdMediaEncodeFormat += " " + str( tor.torrent.logScore) + "%" # the % implies "log" so leave out the word log if tor.torrent.format == "AAC": if (any("itunes" in word.lower() for word in tor.torrent.description.split())) or ( any("itunes" in word.lower() for word in tor.torrent.filePath.split())): newEntry.fmttdMediaEncodeFormat = "iTunes " newEntry.fmttdMediaEncodeFormat += "AAC" if tor.torrent.format == "MP3": # dont actually write mp3 # only write Scene or WEB if it is an mp3 if tor.torrent.scene: newEntry.fmttdMediaEncodeFormat += "Scene" elif tor.torrent.media == "WEB": newEntry.fmttdMediaEncodeFormat += "WEB" else: newEntry.fmttdMediaEncodeFormat += tor.torrent.media if "VBR" in tor.torrent.encoding: newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding[:2] else: newEntry.fmttdMediaEncodeFormat += " " + tor.torrent.encoding newEntry.fmttdMediaEncodeFormat = "[" + newEntry.fmttdMediaEncodeFormat + "]" newEntry.createdpropername += " " + newEntry.fmttdMediaEncodeFormat # put catalog number in brackets if tor.group.catalogueNumber: fmttdcatalogueNumber = ("[" + tor.group.catalogueNumber + "]").replace(" ", "").upper() elif tor.group.recordLabel: fmttdcatalogueNumber = "[" + tor.group.recordLabel + "]" # combines with next part to put recordLabel in the front if Cat# missing if any(word in tor.group.recordLabel.lower() for word in Sorted_Record_Labels_List): # so not case sensitive newEntry.createdpropername = fmttdcatalogueNumber + " " + newEntry.createdpropername elif tor.group.recordLabel: newEntry.createdpropername += " " + "{" + tor.group.recordLabel + "}" # if tor.group.catalogueNumber: #This will put [CATA###] after all releases, even labels not in your list # newEntry.createdpropername += " " + fmttdcatalogueNumber #Gets kind of cumbersome for me. (uncomment to use it anyway) # these 2 lines are a quick fix, for an oversight in my naming process # if these are single file .mp3's (or a single .flac) they will need a .mp3 at the end of the filename if not tor.torrent.filePath: newEntry.createdpropername += "." + tor.torrent.format.lower() try: print(currentfilenumber, newEntry.createdpropername.encode('ascii', errors='ignore').decode()) except: print("COULD NOT PRINT UNICODE FILENAME TO CONSOLE. HASH=", tor.torrent.infoHash) ########-------------replace characters section----------------######### newEntry.createdpropername = newEntry.createdpropername.replace("\\","\") # U+FF3C FULLWIDTH REVERSE SOLIDUS # these forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it. newEntry.createdpropername = newEntry.createdpropername.replace(" / ","/") # U+FFOF (wide) FULLWIDTH SOLIDUS # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth) newEntry.createdpropername = newEntry.createdpropername.replace("/"," ∕ ") # U+2215 (narrow) DIVISION SLASH newEntry.createdpropername = newEntry.createdpropername.replace(":","꞉") # U+A789 MODIFIER LETTER COLON newEntry.createdpropername = newEntry.createdpropername.replace("*","※") # U+203B REFERENCE MARK newEntry.createdpropername = newEntry.createdpropername.replace("?","؟") # U+061F ARABIC QUESTION MARK newEntry.createdpropername = newEntry.createdpropername.replace('"',"ʺ") # U+02BA MODIFIER LETTER DOUBLE PRIME newEntry.createdpropername = newEntry.createdpropername.replace("<","˂") # U+02C2 MODIFIER LETTER LEFT ARROWHEAD newEntry.createdpropername = newEntry.createdpropername.replace(">","˃") # U+02C3 MODIFIER LETTER RIGHT ARROWHEAD newEntry.createdpropername = newEntry.createdpropername.replace("|","ǀ") # U+01C0 LATIN LETTER DENTAL CLICK #####--windows filename banned chars replacement with unicode--######### ######----------HashGrabs-as-Filenames--------######## # File output. Move all files named as hashes to a new dir as the proper name if not os.path.exists(hashtofilenamefolder + newEntry.createdpropername): shutil.copy(hashidfilename, hashtofilenamefolder + newEntry.createdpropername) currentfilenumber += 1 #####------------make propernames.txt (has the hash in it also) ---------######## # Add it to the container (since this is in a loop) writelistcontainer.append(newEntry.createdpropername + " / " + tor.torrent.infoHash + "\n") ##File Output. The Master List file of everything.## # when the loop exits, Sort it, and write it to the file. writelistcontainer.sort() for eachline in writelistcontainer: writelistfile.write(eachline) writelistfile.close()
def main(): ss = Preferences() script1sourcedir = ss.getwpath(u"script1sourcedir")+u'' #("seeding\"), needs unicode u for file opening. files = [os.path.join(script1sourcedir,filename) for filename in next(os.walk(script1sourcedir))[2]] #gives absolute paths + names currentfile = 0 container = [] #set up an empty container for desired data to get put into for later for eachfile in files: metainfo = bencode.decode_from_file(eachfile) # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash infodict = metainfo['info'] info_hash = hashlib.sha1(bencode.bencode(infodict)).hexdigest().upper() internalname = infodict['name'] torrentfilename = eachfile[eachfile.rfind("\\")+1:] locextension = torrentfilename.find(".torrent") #location of extension (char position) locid = torrentfilename.rfind("-")+1 #location of torrentID (char position) torrentid = torrentfilename[locid:locextension] #grab torrentID torrentfilename = torrentfilename[:locid-1] #####-------------replace banned characters with unicode section-----------------###### ### # Forward slashes are strange. "FullWidth" is very wide and would be too wide if theres already spaces around it. torrentfilename = torrentfilename.replace(" / ",u"/") # U+FFOF (wide) FULLWIDTH SOLIDUS # "Division" slash is too narrow and needs spaces inserted surrounding it (and is still less width than the fullwidth) torrentfilename = torrentfilename.replace("/",u" ∕ ") # U+2215 (narrow) DIVISION SLASH # Backslash (requires two slashes in python) torrentfilename = torrentfilename.replace("\\",u"\") # U+FF3C FULLWIDTH REVERSE SOLIDUS # Colon torrentfilename = torrentfilename.replace(":",u"꞉") # U+A789 MODIFIER LETTER COLON # asterisk torrentfilename = torrentfilename.replace("*",u"※") # U+203B REFERENCE MARK # question mark (replacement is backwards, sorry) torrentfilename = torrentfilename.replace("?",u"؟") # U+061F ARABIC QUESTION MARK # Double-quote torrentfilename = torrentfilename.replace('"',u"ʺ") # U+02BA MODIFIER LETTER DOUBLE PRIME # Left angle bracket torrentfilename = torrentfilename.replace("<",u"˂") # U+02C2 MODIFIER LETTER LEFT ARROWHEAD # right angle bracket torrentfilename = torrentfilename.replace(">",u"˃") # U+02C3 MODIFIER LETTER RIGHT ARROWHEAD # Pipe torrentfilename = torrentfilename.replace("|",u"ǀ") # U+01C0 LATIN LETTER DENTAL CLICK ### #####----------windows filename banned chars replacement with unicode-----------###### container.append([torrentfilename, internalname, info_hash, torrentid]) currentfile += 1 print currentfile, torrentfilename.encode('ascii', errors='ignore') print "\nReminder: Console output is ascii only, Cannot Print Unicode. (chars omitted)" ##File Output. The Master List file of everything.## # when the loop exits, Sort it, and write it to the file. container.sort() writelistfile = codecs.open(ss.getwpath("outpath3"), 'wb', "utf-8") # write-out a text file with one entry per line. main output file (3propernames.txt) for eachline in container: writelistfile.write(eachline[0] + " / " + eachline[2] + "\n") #torrentname / infohash writelistfile.close() print "Completed. Unicode File Written to: ", os.path.basename(ss.getwpath("outpath3"))
def _calc_terminal_scores(self, w): """ Calculate the score for each possible terminal/token match """ # First pass: for each token, find the possible terminals that # can correspond to that token finals = defaultdict(set) tokens = dict() self._find_options(w, finals, tokens) # Second pass: find a (partial) ordering by scoring the terminal alternatives for each token scores = dict() noun_prefs = NounPreferences.DICT # Loop through the indices of the tokens spanned by this tree for i in range(w.start, w.end): s = finals[i] # Initially, each alternative has a score of 0 scores[i] = {terminal: 0 for terminal in s} if len(s) <= 1: # No ambiguity to resolve here continue # More than one terminal in the option set for the token at index i # Calculate the relative scores # Find out whether the first part of all the terminals are the same same_first = len(set(terminal.first for terminal in s)) == 1 txt = tokens[i].lower # Get the last part of a composite word (e.g. 'jaðar-áhrifin' -> 'áhrifin') txt_last = txt.rsplit('-', maxsplit=1)[-1] # No need to check preferences if the first parts of all possible terminals are equal # Look up the preference ordering from Reynir.conf, if any prefs = None if same_first else Preferences.get(txt_last) sc = scores[i] if prefs: adj_worse = defaultdict(int) adj_better = defaultdict(int) for worse, better, factor in prefs: for wt in s: if wt.first in worse: for bt in s: if wt is not bt and bt.first in better: if bt.name[0] in "\"'": # Literal terminal: be even more aggressive in promoting it adj_w = -2 * factor adj_b = +6 * factor else: adj_w = -2 * factor adj_b = +4 * factor adj_worse[wt] = min(adj_worse[wt], adj_w) adj_better[bt] = max(adj_better[bt], adj_b) for wt, adj in adj_worse.items(): sc[wt] += adj for bt, adj in adj_better.items(): sc[bt] += adj # Apply heuristics to each terminal that potentially matches this token for t in s: tfirst = t.first if tfirst == "ao" or tfirst == "eo": # Subtract from the score of all ao and eo sc[t] -= 1 elif tfirst == "no": if t.is_singular: # Add to singular nouns relative to plural ones sc[t] += 1 elif t.is_abbrev: # Punish abbreviations in favor of other more specific terminals sc[t] -= 1 if tokens[i].is_upper and tokens[i].is_word and tokens[ i].t2: # Punish connection of normal noun terminal to # an uppercase word that can be a person or entity name if any(m.fl in {"ism", "föð", "móð", "örn", "fyr"} for m in tokens[i].t2): # logging.info("Punishing connection of {0} with 'no' terminal".format(tokens[i].t1)) sc[t] -= 5 # Noun priorities, i.e. between different genders # of the same word form # (for example "ára" which can refer to three stems with different genders) if txt_last in noun_prefs: np = noun_prefs[txt_last].get(t.gender, 0) sc[t] += np elif tfirst == "fs": if t.has_variant("nf"): # Reduce the weight of the 'artificial' nominative prepositions # 'næstum', 'sem', 'um' sc[t] -= 8 # Make other cases outweigh the Nl_nf bonus of +4 (-2 -3 = -5) elif txt == "við" and t.has_variant("þgf"): sc[t] += 1 # Smaller bonus for við + þgf (is rarer than við + þf) elif txt == "sem" and t.has_variant("þf"): sc[t] -= 4 elif txt == "á" and t.has_variant("þgf"): sc[t] += 4 # Larger bonus for á + þgf to resolve conflict with verb 'eiga' else: # Else, give a bonus for each matched preposition sc[t] += 2 elif tfirst == "so": if t.num_variants > 0 and t.variant(0) in "012": # Consider verb arguments # Normally, we give a bonus for verb arguments: the more matched, the better numcases = int(t.variant(0)) adj = 2 * numcases # !!! Logic should be added here to encourage zero arguments for verbs in 'miðmynd' if numcases == 0: # Zero arguments: we might not like this vo0 = VerbObjects.VERBS[0] if all( (m.stofn not in vo0) and (m.ordmynd not in vo0) and ("MM" not in m.beyging) for m in tokens[i].t2 if m.ordfl == "so"): # No meaning where the verb has zero arguments # print("Subtracting 5 points for 0-arg verb {0}".format(tokens[i].t1)) adj = -5 # Apply score adjustments for verbs with particular object cases, # as specified by $score(n) pragmas in Verbs.conf # In the (rare) cases where there are conflicting scores, # apply the most positive adjustment adjmax = 0 for m in tokens[i].t2: if m.ordfl == "so": key = m.stofn + t.verb_cases score = VerbObjects.SCORES.get(key) if score is not None: adjmax = score break sc[t] += adj + adjmax if t.is_sagnb: # We like sagnb and lh, it means that more # than one piece clicks into place sc[t] += 6 elif t.is_lh: # sagnb is preferred to lh, but vb (veik beyging) is discouraged if t.has_variant("vb"): sc[t] -= 2 else: sc[t] += 3 elif t.is_lh_nt: sc[t] += 12 # Encourage LHNT rather than LO elif t.is_mm: # Encourage mm forms. The encouragement should be better than # the score for matching a single case, so we pick so_0_mm # rather than so_1_þgf, for instance. sc[t] += 3 elif t.is_vh: # Encourage vh forms sc[t] += 2 if t.is_subj: # Give a small bonus for subject matches if t.has_variant("none"): # ... but a punishment for subj_none sc[t] -= 3 else: sc[t] += 1 if t.is_nh: if (i > 0) and any(pt.first == 'nhm' for pt in finals[i - 1]): # Give a bonus for adjacent nhm + so_nh terminals sc[t] += 4 # Prop up the verb terminal with the nh variant for pt in scores[i - 1].keys(): if pt.first == 'nhm': # Prop up the nhm terminal scores[i - 1][pt] += 2 break if any(pt.first == "no" and pt.has_variant("ef") and pt.is_plural for pt in s): # If this is a so_nh and an alternative no_ef_ft exists, choose this one # (for example, 'hafa', 'vera', 'gera', 'fara', 'mynda', 'berja', 'borða') sc[t] += 4 if (i > 0) and tokens[i].is_upper: # The token is uppercase and not at the start of a sentence: # discourage it from being a verb sc[t] -= 4 elif tfirst == "tala": if t.has_variant("ef"): # Try to avoid interpreting plain numbers as possessives sc[t] -= 4 elif tfirst == "person": if t.has_variant("nf"): # Prefer person names in the nominative case sc[t] += 2 elif tfirst == "sérnafn": if not tokens[i].t2: # If there are no BÍN meanings, we had no choice but to use sérnafn, # so alleviate some of the penalty given by the grammar sc[t] += 4 else: # BÍN meanings are available: discourage this # print(f"Discouraging sérnafn {txt}, BÍN meanings are {tokens[i].t2}") sc[t] -= 10 if i == w.start: # First token in sentence, and we have BÍN meanings: # further discourage this sc[t] -= 6 elif tfirst == "fyrirtæki": # We encourage company names to be interpreted as such, # so we give company abbreviations ('hf.', 'Corp.', 'Limited') # a high priority sc[t] += 24 elif tfirst == "st" or (tfirst == "sem" and t.colon_cat == "st"): if txt == "sem": # Discourage "sem" as a pure conjunction (samtenging) # (it does not get a penalty when occurring as # a connective conjunction, 'stt') sc[t] -= 6 elif tfirst == "abfn": # If we have number and gender information with the reflexive # pronoun, that's good: encourage it sc[t] += 6 if t.num_variants > 1 else 2 elif tfirst == "gr": # Encourage separate definite article rather than pronoun sc[t] += 2 elif t.name[0] in "\"'": # Give a bonus for exact or semi-exact matches sc[t] += 1 return scores
def main(): ss = Preferences() oldfile = open(ss.get("utresumedat"), "rb").read() newfile = open(os.path.join(ss.get("maindir"), u"NEWDAT.dat"), "wb") namesandhashfile = open(ss.getwpath("outpath3"), "rb").readlines() beforeafterpath = ss.getwpath( "outpath4" ) # this holds the intermediate changes to happen before actually renaming so you have a chance to edit/change it. (4beforepath-afterpath.txt) torrentlist = bencode.bdecode(oldfile) # These two things interfere with the processing on the next line fileguarduseless = torrentlist.pop(".fileguard", None) rec = torrentlist.pop("rec", None) # Remove this. # (dict. comprehension expects only dicts as the root keys) # create a reverse lookup dict with "Dict comprehension". nice and simple eh? ;-) reverselookup = { base64.b16encode(value["info"]): [key, value["caption"], value["path"]] for key, value in torrentlist.iteritems() } listofbeforeafter = [] # to modify paths in reverse lookup dict, start by getting the names and hash out of the namesandhashfile for eachline in namesandhashfile: nameandhash = eachline.strip().split( " / " ) # strip out the \n with strip() and split on the " / " i put there as a seperator. theNewname = nameandhash[0] thehash = nameandhash[1] # searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if thehash in reverselookup: key = reverselookup[thehash][0] theOldPath = torrentlist[key]["path"] theNewPath = os.path.join(os.path.dirname(theOldPath), theNewname) if theOldPath != theNewPath: listofbeforeafter.append( [theOldPath, theNewPath, thehash] ) # make a list of a list (stringtoOutputtoFile=[0], hash=[1]) # sort, then write file detailing changes to path (before / after) listofbeforeafter.sort() beforeafterfile = open(beforeafterpath, "wb") for eachline in listofbeforeafter: try: beforeafterfile.write( eachline[0] + " / " + eachline[2] + "\n" ) # write oldpath + hash on 1st line /The hash is duplicated for error checking in case the user accidentally bungles a character while editing... beforeafterfile.write(eachline[1] + " / " + eachline[2] + "\n") # write newpath + hash on 2nd line / except: print "Error writing the before+after file, probably a encoding/unicode error: \n", eachline[ 0 ], "\n", eachline[1] print "This was a fatal error and program could not continue." return beforeafterfile.close() # At this point the script pauses, and asks the user to confirm changes shown in the beforepath-afterpath.txt file raw_input( "Press Enter to begin Renaming files.......\\> " ) # wait for the user to press Enter before continuing with anything. # WRITE TORRENT RESUME.DAT beforeafterfile = open(beforeafterpath, "rb").readlines() for i in xrange(0, len(beforeafterfile), 2): beforeandhash = beforeafterfile[i].strip().split(" / ") afterandhash = beforeafterfile[i + 1].strip().split(" / ") before = beforeandhash[0].decode("utf-8") beforehash = beforeandhash[1] after = afterandhash[0].decode("utf-8") afterhash = afterandhash[1] if beforehash == afterhash: thehash = beforehash else: print "Error. You have inadvertently modified one of the hash files, and there is a hash mismatch between before/after entries." print "Cannot continue. Exiting. Please save your changes into a new file, locate your error, and re-run and fix it..." print "Another possibility is you were missing a / (with 1 character of whitespace on each side surrounding it) as a seperator." # searches the dict's keys for a Hash, if exists. and if so, can be used as the [indexid] if thehash in reverselookup: key = reverselookup[thehash][0] torrentlist[key]["caption"] = after[after.rfind("\\") + 1 :] try: # prints a number to console to show progress. corresponds to the numbers in the file (every-two-lines). (tip:) to show incremental numbers use (((i+1)/2)+1) # filenames printed to console, will be missing any unicode chars because the windows console is not unicode compatible!!!! (annoying) print i, before.encode("ascii", errors="ignore") print i + 1, after.encode("ascii", errors="ignore") os.rename(before, after) except Exception as e: traceback.print_exc() # will output any errors to console but keep going torrentlist[key]["path"] = after if after.endswith(".mp3") or after.endswith( ".flac" ): # .mp3 .flac = I personally didnt have any "Single file" .ogg, .aac, etc that needed special handling in this manner if torrentlist[key].has_key( "targets" ): # these lines are a quick fix, for an oversight in the uTorrent process. changing path is not enough torrentlist[key]["targets"][0][1] = after[ after.rfind("\\") + 1 : ] # single-file-mode torrents have a "targets" list that controls the filename torrentlist["rec"] = rec # add the thing we removed back in so we dont break anything (not sure what this is) # fileguard does not need to go back, in fact, purposefully needs to stay out. newfile.write(bencode.bencode(torrentlist)) newfile.close() print "Finished writing: ", newfile.name