def handle_args(): parser = argparse.ArgumentParser( description="Parse a QLD Members' Interests PDF to a database.") parser.add_argument('input', help='the PDF file to parse') parser.add_argument('--dropall', action='store_true', help='drop all tables before processing begins') return parser.parse_args()
def _parse_args(): parser = argparse.ArgumentParser(description='Download privacy policies, optionally update the DB') parser.add_argument('input_path', help='Path to file where policy urls are located.') parser.add_argument('output_dir', help='Path to directory where policies will be saved. Creates directory structure <outputdir>/<date>/<regiontag>/<domain>/<urlhash>/') parser.add_argument('--processes', '-p', default=multiprocessing.cpu_count(), type=int, help='Number of processes to use') parser.add_argument('--check_previous', '-c', default=False, action='store_true', help='Boolean indicating whether to check against previous policies') parser.add_argument('--language', '-l', default='en-US, en', help='Language string to set in Firefox\'s intl.accept_languages option. Defaults to "en_US, en"') parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging') return parser.parse_args()
def get_args(): parser = argparse.ArgumentParser() parser.add_argument( '-f', '--file', help='input pdf', default= '/Users/Dhruv/Downloads/Sample roll call vote PDF_multiple columns[2].pdf' ) return parser.parse_args()
def main(): """ Main function """ parser = _build_parser() options = parser.parse_args() filepath = options.filepath language = options.language outpath = options.outpath print('Getting tokens...') tokens = pdf_tokens(filepath, language) print(f'Writing to {outpath}') with open(outpath, 'w') as outfile: for item in tokens: outfile.write(f'{item}\n')
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input_file", help="complete location of the input pdf file", required=True) parser.add_argument("-d", "--destination_file", help="complete location where output csv file will be created", required=False) args = parser.parse_args() input_file = None output_file_location = None if args.input_file: input_file = args.input_file if args.destination_file: output_file_location = args.destination_file return input_file, output_file_location
def main(): import argparse parser = argparse.ArgumentParser( description="""Extrae y guarda texto y metadata de archivos.""") parser.add_argument("dirin", help="Directorio de archivos originales.") parser.add_argument("dirout", help="Directorio para almacenar texto extraido.") parser.add_argument( "--recursivo", default=False, action="store_true", help= "Visitar subdirectorios si se incluye. (%(default)s) Ej: --recursivo", ) parser.add_argument( "--exts", action="append", required=False, help="Extraer solo de este tipo de archivo. Ej: --exts pdf --exts docx", ) parser.add_argument( "--basura", action="append", help="Eliminar estos caracteres. Ej: --basura '<>!#' --basura � ", ) parser.add_argument( "--chars", default=0, type=int, help= "Eliminar texto con pocos caracteres. (%(default)s). Ej: --chars 10", ) args = parser.parse_args() dirin = args.dirin dirout = Path(args.dirout).resolve() recursivo = args.recursivo exts = args.exts basura = args.basura chars = args.chars n = extraer_todos(dirin, dirout, recursivo=recursivo, exts=exts, basura=basura, chars=chars) print(f"{n} nuevos archivos guardados en carpeta {str(dirout)}")
def main(): """Run tikatree from command line""" global MASK start_time = time() parser = initArgparse() args = parser.parse_args() dirtree = args.directorytree filetree = args.filetree meta = args.metadata newmeta = args.newmetadata sfv = args.sfv yes = args.yes MASK = args.exclude for i in args.DIRECTORY: if Path(i).exists() is True: basepath = Path(i) else: raise NotADirectoryError(f"{i} does not exist") default = False if dirtree == sfv == filetree == meta == newmeta is False: default = True if dirtree is True or default is True: dirtree_file = f"{basepath.name}_directory_tree.txt" checkFileExists(basepath, dirtree_file, yes) createDirectoryTree(basepath, dirtree_file) if sfv is True or default is True: sfv_file = f"{basepath.name}.sfv" checkFileExists(basepath, sfv_file, yes) createSfv(basepath, sfv_file) if filetree is True or default is True: csvtree_file = f"{basepath.name}_file_tree.csv" jsontree_file = f"{basepath.name}_file_tree.json" checkFileExists(basepath, jsontree_file, yes) checkFileExists(basepath, csvtree_file, yes) createFileTree(basepath, jsontree_file, csvtree_file) if meta is True or default is True: metadata_file = f"{basepath.name}_metadata.json" checkFileExists(basepath, metadata_file, yes) createMetadata(basepath, metadata_file) if newmeta is True: createNewMetadata(basepath) filesCache.cache_clear() getFileInfo.cache_clear() killTika() stop_time = time() print(f"Finished in {round(stop_time-start_time, 2)} seconds")
if state is None: state = get_random_state(markov_chain) text.append(state.split()[-1]) return ' '.join(text) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Markov Chain Text Generator') parser.add_argument('-f', '--file', required=True, help='Name of file to read text from.') parser.add_argument('-o', '--order', default=1, type=int, help='Number of past states each state depends on.') parser.add_argument('-w', '--words', default=100, type=int, help='Number of words to generate.') pargs = parser.parse_args() tokens = tokenise_text_file(pargs.file) markov_chain = create_markov_chain(tokens, order=pargs.order) print(generate_text(markov_chain, pargs.words)) file = open("outputv4.txt", "w") file.write(generate_text(markov_chain, pargs.words)) file.close()
return propsForCreate print tika_obo.doCopyAndTagUp(docLocalPath,folder.id) ################################################################ # main entry point HERE usage = "usage: %prog -s sourcePathToCopy -t targetPathOnRepository -f fileFilter(default=*.*)" parser = OptionParser(usage=usage) ## get the values for source and target from the command line parser.add_option("-s", "--source", action="store", type="string", dest="source", help="Top level of local source directory tree to copy") parser.add_option("-t", "--target", action="store", type="string", dest="target", help="path to (existing) target CMIS folder. All children will be created during copy.") parser.add_option("-f", "--filter", action="store", type="string", dest="filter", default="*.*", help="File filter. e.g. *.jpg or *.* ") (options, args) = parser.parse_args() startingSourceFolderForCopy = options.source targetCmisFolderStartingPath = options.target # read in the config values config = ConfigParser.RawConfigParser() config.read(configFileName) try: UrlCmisService = config.get(cmisConfigSectionName, "serviceURL") targetClassName = config.get(cmisConfigSectionName, "targetClassName") user_id = config.get(cmisConfigSectionName, "user_id") password = config.get(cmisConfigSectionName, "password") debugMode = config.get(cmisConfigSectionName, "debug") except: print "There was a problem finding the the config file:" + configFileName + " or one of the settings in the [" + cmisConfigSectionName + "] section ." sys.exit()