Ejemplo n.º 1
0
def handle_args():
    parser = argparse.ArgumentParser(
        description="Parse a QLD Members' Interests PDF to a database.")
    parser.add_argument('input', help='the PDF file to parse')
    parser.add_argument('--dropall',
                        action='store_true',
                        help='drop all tables before processing begins')
    return parser.parse_args()
Ejemplo n.º 2
0
def _parse_args():
    parser = argparse.ArgumentParser(description='Download privacy policies, optionally update the DB')
    parser.add_argument('input_path', help='Path to file where policy urls are located.')
    parser.add_argument('output_dir', help='Path to directory where policies will be saved. Creates directory structure <outputdir>/<date>/<regiontag>/<domain>/<urlhash>/')
    parser.add_argument('--processes', '-p', default=multiprocessing.cpu_count(), type=int, help='Number of processes to use')
    parser.add_argument('--check_previous', '-c', default=False, action='store_true', help='Boolean indicating whether to check against previous policies')
    parser.add_argument('--language', '-l', default='en-US, en', help='Language string to set in Firefox\'s intl.accept_languages option. Defaults to "en_US, en"')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
    return parser.parse_args()
Ejemplo n.º 3
0
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f',
        '--file',
        help='input pdf',
        default=
        '/Users/Dhruv/Downloads/Sample roll call vote PDF_multiple columns[2].pdf'
    )

    return parser.parse_args()
Ejemplo n.º 4
0
def main():
    """ Main function """
    parser = _build_parser()
    options = parser.parse_args()
    filepath = options.filepath
    language = options.language
    outpath = options.outpath
    print('Getting tokens...')
    tokens = pdf_tokens(filepath, language)
    print(f'Writing to {outpath}')
    with open(outpath, 'w') as outfile:
        for item in tokens:
            outfile.write(f'{item}\n')
Ejemplo n.º 5
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--input_file", help="complete location of the input pdf file", required=True)
    parser.add_argument("-d", "--destination_file", help="complete location where output csv file will be created", required=False)
    args = parser.parse_args()
    input_file = None
    output_file_location = None
    if args.input_file:
        input_file = args.input_file
    if args.destination_file:
        output_file_location = args.destination_file

    return input_file, output_file_location
Ejemplo n.º 6
0
def main():
    import argparse

    parser = argparse.ArgumentParser(
        description="""Extrae y guarda texto y metadata de archivos.""")

    parser.add_argument("dirin", help="Directorio de archivos originales.")
    parser.add_argument("dirout",
                        help="Directorio para almacenar texto extraido.")
    parser.add_argument(
        "--recursivo",
        default=False,
        action="store_true",
        help=
        "Visitar subdirectorios si se incluye. (%(default)s) Ej: --recursivo",
    )
    parser.add_argument(
        "--exts",
        action="append",
        required=False,
        help="Extraer solo de este tipo de archivo. Ej: --exts pdf --exts docx",
    )
    parser.add_argument(
        "--basura",
        action="append",
        help="Eliminar estos caracteres. Ej: --basura '<>!#' --basura � ",
    )
    parser.add_argument(
        "--chars",
        default=0,
        type=int,
        help=
        "Eliminar texto con pocos caracteres. (%(default)s). Ej: --chars 10",
    )

    args = parser.parse_args()

    dirin = args.dirin
    dirout = Path(args.dirout).resolve()
    recursivo = args.recursivo
    exts = args.exts
    basura = args.basura
    chars = args.chars

    n = extraer_todos(dirin,
                      dirout,
                      recursivo=recursivo,
                      exts=exts,
                      basura=basura,
                      chars=chars)
    print(f"{n} nuevos archivos guardados en carpeta {str(dirout)}")
Ejemplo n.º 7
0
def main():
    """Run tikatree from command line"""
    global MASK
    start_time = time()
    parser = initArgparse()
    args = parser.parse_args()
    dirtree = args.directorytree
    filetree = args.filetree
    meta = args.metadata
    newmeta = args.newmetadata
    sfv = args.sfv
    yes = args.yes
    MASK = args.exclude

    for i in args.DIRECTORY:
        if Path(i).exists() is True:
            basepath = Path(i)
        else:
            raise NotADirectoryError(f"{i} does not exist")
        default = False
        if dirtree == sfv == filetree == meta == newmeta is False:
            default = True
        if dirtree is True or default is True:
            dirtree_file = f"{basepath.name}_directory_tree.txt"
            checkFileExists(basepath, dirtree_file, yes)
            createDirectoryTree(basepath, dirtree_file)
        if sfv is True or default is True:
            sfv_file = f"{basepath.name}.sfv"
            checkFileExists(basepath, sfv_file, yes)
            createSfv(basepath, sfv_file)
        if filetree is True or default is True:
            csvtree_file = f"{basepath.name}_file_tree.csv"
            jsontree_file = f"{basepath.name}_file_tree.json"
            checkFileExists(basepath, jsontree_file, yes)
            checkFileExists(basepath, csvtree_file, yes)
            createFileTree(basepath, jsontree_file, csvtree_file)
        if meta is True or default is True:
            metadata_file = f"{basepath.name}_metadata.json"
            checkFileExists(basepath, metadata_file, yes)
            createMetadata(basepath, metadata_file)
        if newmeta is True:
            createNewMetadata(basepath)
        filesCache.cache_clear()
        getFileInfo.cache_clear()
        killTika()

    stop_time = time()
    print(f"Finished in {round(stop_time-start_time, 2)} seconds")
Ejemplo n.º 8
0
        if state is None:
            state = get_random_state(markov_chain)
        text.append(state.split()[-1])
    return ' '.join(text)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Markov Chain Text Generator')
    parser.add_argument('-f',
                        '--file',
                        required=True,
                        help='Name of file to read text from.')
    parser.add_argument('-o',
                        '--order',
                        default=1,
                        type=int,
                        help='Number of past states each state depends on.')
    parser.add_argument('-w',
                        '--words',
                        default=100,
                        type=int,
                        help='Number of words to generate.')
    pargs = parser.parse_args()

    tokens = tokenise_text_file(pargs.file)
    markov_chain = create_markov_chain(tokens, order=pargs.order)
    print(generate_text(markov_chain, pargs.words))
    file = open("outputv4.txt", "w")
    file.write(generate_text(markov_chain, pargs.words))
    file.close()
Ejemplo n.º 9
0
        return propsForCreate


    print tika_obo.doCopyAndTagUp(docLocalPath,folder.id)

################################################################
# main entry point HERE
usage = "usage: %prog -s sourcePathToCopy -t targetPathOnRepository -f fileFilter(default=*.*)"
parser = OptionParser(usage=usage)

## get the values for source and target from the command line
parser.add_option("-s", "--source", action="store", type="string", dest="source", help="Top level of local source directory tree to copy")
parser.add_option("-t", "--target", action="store", type="string", dest="target", help="path to (existing) target CMIS folder. All children will be created during copy.")
parser.add_option("-f", "--filter", action="store", type="string", dest="filter", default="*.*", help="File filter. e.g. *.jpg or *.* ")

(options, args) = parser.parse_args()
startingSourceFolderForCopy = options.source
targetCmisFolderStartingPath = options.target
  
# read in the config values
config = ConfigParser.RawConfigParser()
config.read(configFileName)
try:
    UrlCmisService = config.get(cmisConfigSectionName, "serviceURL")
    targetClassName = config.get(cmisConfigSectionName, "targetClassName")
    user_id = config.get(cmisConfigSectionName, "user_id")
    password = config.get(cmisConfigSectionName, "password")
    debugMode = config.get(cmisConfigSectionName, "debug")
except:
    print "There was a problem finding the the config file:" + configFileName + " or one of the settings in the [" + cmisConfigSectionName + "] section ."
    sys.exit()