def getScore(self): lt = ListTool() for data in self.xmlInsp: pl = PageLoader(data.fil) if not pl.isReadable(): print('Abort. XMLInspections data corrupted. File not readable:', data.fil) return False pl.read() lWords = lt.getNonUniques(self.keyWords, pl.linkWords) tWords = lt.getNonUniques(self.keyWords, pl.titleWords) hWords = lt.getNonUniques(self.keyWords, pl.headerWords) sWords = lt.getNonUniques(self.keyWords, pl.specialWords) nWords = lt.getNonUniques(self.keyWords, pl.normalWords) score = len(lWords) * self.lMulti score += len(tWords) * self.tMulti score += len(hWords) * self.hMulti score += len(sWords) * self.sMulti score += len(nWords) * self.nMulti data.score = score return self.xmlInsp
def storeWords(self): self.wl = WordList() xReader = XMLReader() xParser = XMLParser() if xReader.checkIfExistsQuiet('xml/words.xml'): tree = xReader.getTree('xml/words.xml') wordAvg, avgRatio = xParser.getGeneralFromWords(tree) self.wl = xParser.getWords(tree) usf = 0 usl = 0 if self.vote == "up": usf = 1 else: usl = 1 for ind, obj in enumerate(self.XMLInspections): if obj.ID != self.voteId: continue pl = PageLoader(obj.fil) if not pl.isReadable(): print('Abort. File not readable:', obj.fil) exit() pl.read() patt = "^[a-zA-Z0-9]*$" pl.linkWords = self.removeListElesNotPatterned(patt, pl.linkWords) pl.titleWords = self.removeListElesNotPatterned(patt, pl.titleWords) pl.headerWords = self.removeListElesNotPatterned(patt, pl.headerWords) pl.specialWords = self.removeListElesNotPatterned(patt, pl.specialWords) pl.normalWords = self.removeListElesNotPatterned(patt, pl.normalWords) for word in pl.linkWords: self.wl.append(word, usf, usl) for word in pl.titleWords: self.wl.append(word, usf, usl) for word in pl.headerWords: self.wl.append(word, usf, usl) for word in pl.specialWords: self.wl.append(word, usf, usl) for word in pl.normalWords: self.wl.append(word, usf, usl) return
# settings input_file_name = 'read.html' cache_dir_name = 'cache' out_file_name = 'out.csv' min_delay = 90 max_delay = 120 print('Load books from file: "%s"' % input_file_name) read_parser = ReadParser() if read_parser.load_from_file(input_file_name) is False: exit(1) print('Books loaded.') print('Parse books from summary.') books = read_parser.parse_books() print('Books parsed: %s.' % len(books)) print('Start download detailed book pages.') cache = CacheManager(cache_dir_name) loader = PageLoader(cache, min_delay, max_delay) loader.download(books) print('Detailed book pages downloaded.') print('Prepare books for export.') details_parser = DetailsParser(cache) ready_books = details_parser.parse(books) print('Books ready to export: %s.' % len(ready_books)) writer = CsvWriter() writer.save(ready_books, out_file_name) print('Books saved to "%s"' % out_file_name)
def main(): loglevel = { "critical": logging.CRITICAL, "error": logging.ERROR, "warning": logging.WARNING, "info": logging.INFO, "debug": logging.DEBUG, } home_dir_url = unicode(os.getenv("HOME")) default_conf_dir = os.path.join(home_dir_url, u".castnamer") default_cache_dir = os.path.join(home_dir_url, u".castnamer", u"cache") # option pour faire des links au lieu de renommer # option pour filtrer uniquement sur les directories, cad qu'un truc précisé avec -f sera toujours traité qqs sont ext # option pour maj le cache parser = argparse.ArgumentParser( description= 'castnamer is a program to help rename all your media files.') parser.add_argument( '-f', '--files', dest="files2", nargs="+", default=[], metavar="FILES", help="You can also select the files you want to treat with this option" ) parser.add_argument( 'files', nargs="*", default=[], help= "treat these media files, without the --all option it will exclude all files that does not have a media extension" ) parser.add_argument( '-d', '--dirs', nargs="+", default=[], help= 'treat all the media files in these directories, with the --ext option ALL the files will be treated' ) parser.add_argument('-r', '--dry-run', action='store_true', help='do not rename the files') parser.add_argument( '-s', '--script', metavar="SCRIPT URL", action='store', nargs="?", default=False, const=True, help= "force the generation of the do/undo script (during a dry run for example), you can optionnaly select a url for the script, if the current call is not a dry run a do/undo script will always be generated in " + default_conf_dir) parser.add_argument( '-t', '--title', help='force the title of the parent media (useful for tv series)') parser.add_argument( '-a', '--all', action='store_true', help= 'if set, ALL the files in the directories given with the --dirs options will be treated, otherwise, ONLY the files with media extensions will be treated' ) parser.add_argument('-i', '--ignore-well-named', action='store_true', help='do not treat files that look well named') parser.add_argument('-c', '--clear-cache', action='store_true', help='clear the cache before running') parser.add_argument( '-l', '--force-load', action='store_true', help= "force the loading of every request, the cache won't be updated not cleared" ) parser.add_argument( '-u', '--undo', action='store_true', help= "undo the changes corresponding to the last run of this program by running the most recent undo script in " + default_conf_dir) parser.add_argument( '--conf-dir', default=default_conf_dir, help="set the conf directory, by default it's %(default)s") parser.add_argument( '--cache-dir', default=default_cache_dir, help="set the cache directory, by default it's %(default)s") group_verbosity = parser.add_mutually_exclusive_group() group_verbosity.add_argument( '-v', '--verbosity', metavar="LEVEL", action='store', nargs="?", choices=loglevel.keys(), const="debug", default="warning", help= "set the verbosity level, it's \"warning\" by default and \"debug\" when the flag is set without parameter" ) group_verbosity.add_argument('-q', '--quiet', action='store_true', help='do not display anything') # Read the args args = parser.parse_args() args.title = unicode(args.title) if args.title else None args.cache_dir = unicode(args.cache_dir) args.conf_dir = unicode(args.conf_dir) args.files = [f.decode("utf8") for f in args.files] args.files2 = [f.decode("utf8") for f in args.files2] if args.undo: script_urls = listdir_by_date(args.conf_dir, u"undo") if script_urls != []: last_undo_script_url = script_urls[0][1] os.system('bash "' + os.path.join(args.conf_dir, last_undo_script_url) + '"') exit() # Creation of directories if not os.path.exists(args.conf_dir): os.mkdir(args.conf_dir) if not os.path.exists(args.cache_dir): os.mkdir(args.cache_dir) # Logs if args.quiet: setuplogging(os.path.join(args.conf_dir, "castnamer.log"), loglevel["debug"], False) else: setuplogging(os.path.join(args.conf_dir, "castnamer.log"), loglevel[args.verbosity], True) logging.debug( "args = " + str(args).replace(", ", "\n").replace("(", "\n").replace(")", "\n")) # Cache if args.clear_cache: shutil.rmtree(args.cache_dir) os.mkdir(args.cache_dir) # make page loader if args.force_load: loader = PageLoader() else: loader = PageLoader(args.cache_dir) # make list of files to treat files = [] for d in args.dirs: for f in os.listdir(unicode(d)): if args.all or has_media_extension(f): files.append(os.path.join(d, f)) for f in args.files + args.files2: if args.all or has_media_extension(f): files.append(f) batch_renamer = BatchRenamer() imdbbrowser = IMDBBrowser(loader) media_files = MediaFile.read_list(files) for mf in media_files: mf.make_media(imdbbrowser, args.title) if mf.media is not None: if mf.file_name != mf.get_normalized_name(): batch_renamer.append(mf.url, mf.get_normalized_name()) default_script_storage_url = os.path.join( args.conf_dir, "undo_" + time.strftime("%d-%b-%Y_%H:%M:%S") + ".sh") if args.dry_run: print("\n".join(batch_renamer.get_do_commands())) if args.script == True: batch_renamer.create_undo_script(default_script_storage_url) elif type(args.script) == str: batch_renamer.create_undo_script(args.script) else: logging.debug("\n".join(batch_renamer.get_do_commands())) #batch_renamer.create_undo_script(default_script_storage_url) if type(args.script) == str: batch_renamer.create_undo_script(args.script) batch_renamer.do()