Ejemplo n.º 1
0
def main(unparsed_args_list):
    """Uses parsed_args to run the entirety of the pham align pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_pham_align(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    values = pipelines_basic.parse_value_input(args.input)

    execute_pham_align(alchemist,
                       folder_path=args.folder_path,
                       folder_name=args.folder_name,
                       values=values,
                       filters=args.filters,
                       groups=args.groups,
                       file_type=args.file_type,
                       mat_out=args.distmat_out,
                       tree_out=args.guidetree_out,
                       verbose=args.verbose,
                       dump=args.dump,
                       force=args.force,
                       threads=args.number_threads)
Ejemplo n.º 2
0
def main(unparsed_args_list):
    """
    Run the get_db pipeline.

    The database data can be retrieved from three places:
    The server, which needs to be downloaded to a new folder.
    A local file, in which no download and no new folder are needed.
    The empty schema stored within pdm_utils, in which no download, new folder,
    or local file are needed.

    :param unparsed_args_list: list of arguments to run the pipeline unparsed
    :type unparsed_args_list: list

    """
    args = parse_args(unparsed_args_list)

    # Set values that are shared between all three options.
    config = configfile.build_complete_config(args.config_file)
    alchemist = pipelines_basic.build_alchemist(None,
                                                config=config,
                                                ask_database=False)

    if args.option == "file":
        execute_get_file_db(alchemist,
                            args.database,
                            args.filename,
                            config_file=args.config_file,
                            schema_version=args.schema_version,
                            verbose=args.verbose)
    elif args.option == "new":
        execute_get_new_db(alchemist,
                           args.database,
                           args.schema_version,
                           config_file=args.config_file,
                           verbose=args.verbose)
    else:
        url = args.url
        if url is None:
            server_creds = config["download_server"]
            url = server_creds.get("url")

        if url is None:
            url = DEFAULT_SETTINGS["url"]

        execute_get_server_db(alchemist,
                              args.database,
                              url,
                              folder_path=args.output_folder,
                              db_name=args.db_name,
                              config_file=args.config_file,
                              verbose=args.verbose,
                              subdirectory=args.remote_directory,
                              download_only=args.download_only,
                              get_version=args.get_version,
                              force_pull=args.force_pull,
                              schema_version=args.schema_version)
Ejemplo n.º 3
0
 def test_build_complete_config_2(self):
     """Confirm that ConfigParser is constructed with default None
     using no file."""
     parser2 = configfile.build_complete_config(None)
     with self.subTest():
         self.assertIsNone(parser2["mysql"]["user"])
     with self.subTest():
         self.assertIsNone(parser2["ncbi"]["tool"])
     with self.subTest():
         self.assertIsNone(parser2["ncbi"]["email"])
Ejemplo n.º 4
0
def main(unparsed_args):
    """Runs the complete update pipeline."""
    args = parse_args(unparsed_args[2:])

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]
    alchemist = AlchemyHandler(database=args.database,
                               username=mysql_creds["user"],
                               password=mysql_creds["password"])
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the update pipeline")

    if args.version is True:
        mysqldb.change_version(engine)
        print("Database version updated.")

    if args.ticket_table is not None:
        update_table_path = basic.set_path(args.ticket_table,
                                           kind="file",
                                           expect=True)

        # Iterate through the tickets and process them sequentially.
        list_of_update_tickets = []
        with update_table_path.open(mode='r') as f:
            file_reader = csv.DictReader(f)
            for dict in file_reader:
                list_of_update_tickets.append(dict)

        # Variables to be used for end summary
        processed = 0
        succeeded = 0
        failed = 0

        for dict in list_of_update_tickets:
            status = update_field(alchemist, dict)

            if status == 1:
                processed += 1
                succeeded += 1
            else:
                processed += 1
                failed += 1

        engine.dispose()
        print("\nDone iterating through tickets.")
        if succeeded > 0:
            print(f"{succeeded} / {processed} tickets successfully handled.")
        if failed > 0:
            print(f"{failed} / {processed} tickets failed to be handled.")
Ejemplo n.º 5
0
 def test_build_complete_config_1(self):
     """Confirm that ConfigParser is constructed with default None
     using valid file."""
     parser1 = build_parser(mysql=True)
     create_config_file(parser1, config_filepath)
     parser2 = configfile.build_complete_config(config_filepath)
     with self.subTest():
         self.assertEqual(parser2["mysql"]["user"], USER)
     with self.subTest():
         self.assertIsNone(parser2["ncbi"]["tool"])
     with self.subTest():
         self.assertIsNone(parser2["ncbi"]["email"])
Ejemplo n.º 6
0
def main(unparsed_args_list):
    """Run main get_gb_records pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)
    mysqldb.check_schema_compatibility(alchemist.engine,
                                       "the get_gb_records pipeline")

    values = pipelines_basic.parse_value_input(args.input)

    execute_get_gb_records(alchemist, args.file_type,
                           folder_path=args.folder_path, 
                           folder_name=args.folder_name, 
                           config=config,
                           values=values, verbose=args.verbose, 
                           filters=args.filters, groups=args.groups) 
Ejemplo n.º 7
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the file export pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    # Returns after printing appropriate error message from parsing/connecting.
    args = parse_export(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    # Exporting as a SQL file is not constricted by schema version.
    if args.pipeline != "sql":
        mysqldb.check_schema_compatibility(alchemist.engine, "export")

    values = None
    if args.pipeline in FILTERABLE_PIPELINES:
        values = pipelines_basic.parse_value_input(args.input)
        if not values:
            values = None

    if args.pipeline not in PIPELINES:
        print("ABORTED EXPORT: Unknown pipeline option discrepency.\n"
              "Pipeline parsed from command line args is not supported")
        sys.exit(1)

    if args.pipeline != "I":
        execute_export(alchemist, args.pipeline, folder_path=args.folder_path,
                       folder_name=args.folder_name, table=args.table,
                       values=values, filters=args.filters, groups=args.groups,
                       sort=args.sort, include_columns=args.include_columns,
                       exclude_columns=args.exclude_columns,
                       sequence_columns=args.sequence_columns,
                       raw_bytes=args.raw_bytes,
                       concatenate=args.concatenate, db_name=args.db_name,
                       verbose=args.verbose, dump=args.dump, force=args.force,
                       threads=args.number_processes, phams_out=args.phams_out)
    else:
        pass
Ejemplo n.º 8
0
def main(unparsed_args):
    """Uses parsed args to run the entirety of the find primers pipeline.

    :param unparsed_args: Input a list of command line args.
    :type unparsed_args: list[str]
    """
    args = parse_find_primers(unparsed_args)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    values = pipelines_basic.parse_value_input(args.input)

    execute_find_primers(alchemist,
                         folder_path=args.folder_path,
                         folder_name=args.folder_name,
                         values=values,
                         filters=args.filters,
                         groups=args.groups,
                         verbose=args.verbose,
                         threads=args.threads,
                         prc=args.prc,
                         minD=args.minD,
                         maxD=args.maxD,
                         hpn_min=args.hpn_min,
                         ho_min=args.ho_min,
                         het_min=args.het_min,
                         GC_max=args.GC,
                         len_oligomer=args.oligomer_length,
                         tm_min=args.tm_min,
                         tm_max=args.tm_max,
                         tm_gap=args.tm_gap,
                         ta_min=args.ta_min,
                         ta_max=args.ta_max,
                         mode=args.mode,
                         soft_cap=args.soft_cap,
                         phams_in=args.phams_in,
                         fwd_in=args.fwd_in,
                         rvs_in=args.rvs_in)
Ejemplo n.º 9
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the pham_finder pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_pham_finder(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(None, ask_database=False,
                                                config=config)
    
    values = None
    if args.input:
        values = pipelines_basic.parse_value_input(args.input)

    execute_pham_finder(alchemist, args.folder_path, args.folder_name,
                        args.adatabase, args.bdatabase, values=values,
                        filters=args.filters, groups=args.groups, 
                        sort=args.sort, show_per=args.show_percentages,
                        use_locus=args.use_locus, verbose=args.verbose)
Ejemplo n.º 10
0
def main(unparsed_args_list):
    args = parse_build_pan(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    values = pipelines_basic.parse_value_input(args.input)

    execute_build_pan(alchemist,
                      hhdb_path=args.hhsuite_database,
                      folder_path=args.folder_path,
                      folder_name=args.folder_name,
                      values=values,
                      verbose=args.verbose,
                      filters=args.filters,
                      groups=args.groups,
                      threads=args.number_threads,
                      M=args.min_percent_gaps,
                      aD=args.avg_distance,
                      mD=args.min_distance,
                      B=args.DB_stiffness,
                      PANgraph_out=args.PANgraph_out)
Ejemplo n.º 11
0
def main(unparsed_args_list):
    args = parse_cluster_db(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)
    values = pipelines_basic.parse_value_input(args.input)

    execute_cluster_db(alchemist,
                       folder_path=args.folder_path,
                       folder_name=args.folder_name,
                       values=values, verbose=args.verbose,
                       filters=args.filters, groups=args.groups,
                       threads=args.number_threads, kmer=args.kmer_size,
                       sketch=args.sketch_size,
                       gcs=args.gene_content_similarity_min,
                       ani=args.average_nucleotide_identity_min,
                       gcsmax=args.gene_content_similarity_max,
                       animax=args.average_nucleotide_identity_max,
                       gcsS=args.gcsS, gcsM=args.gcsM, aniS=args.aniS,
                       aniM=args.aniM, evaluate=args.dump_evaluation,
                       mat_out=args.distmat_out, subcluster=args.subcluster,
                       cluster_prefix=args.cluster_prefix)
Ejemplo n.º 12
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the revise pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_revise(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    if args.pipeline == "local":
        execute_local_revise(alchemist,
                             args.revisions_file,
                             folder_path=args.folder_path,
                             folder_name=args.folder_name,
                             config=config,
                             input_type=args.input_type,
                             output_type=args.output_type,
                             filters=args.filters,
                             groups=args.groups,
                             verbose=args.verbose,
                             force=args.force,
                             production=args.production)
    elif args.pipeline == "remote":
        values = pipelines_basic.parse_value_input(args.input)
        execute_remote_revise(alchemist,
                              folder_path=args.folder_path,
                              folder_name=args.folder_name,
                              config=config,
                              values=values,
                              filters=args.filters,
                              groups=args.groups,
                              verbose=args.verbose,
                              output_type=args.output_type,
                              force=args.force)
Ejemplo n.º 13
0
def main(unparsed_args_list):
    """Uses parsed args to run the entirety of the pham_review pipeline.

    :param unparsed_args_list: Input a list of command line args.
    :type unparsed_args_list: list[str]
    """
    args = parse_pham_review(unparsed_args_list)

    config = configfile.build_complete_config(args.config_file)

    alchemist = pipelines_basic.build_alchemist(args.database, config=config)

    values = pipelines_basic.parse_value_input(args.input)

    if not args.all_reports:
        gr_reports = args.gene_reports
        s_report = args.summary_report
        psr_reports = args.pham_summary_reports
    else:
        gr_reports = True
        s_report = True
        psr_reports = True

    execute_pham_review(alchemist,
                        folder_path=args.folder_path,
                        folder_name=args.folder_name,
                        no_review=args.no_review,
                        values=values,
                        force=args.force,
                        filters=args.filters,
                        groups=args.groups,
                        sort=args.sort,
                        s_report=s_report,
                        gr_reports=gr_reports,
                        production=args.production,
                        psr_reports=psr_reports,
                        verbose=args.verbose)
Ejemplo n.º 14
0
def main(unparsed_args_list):
    """Run main freeze database pipeline."""
    args = parse_args(unparsed_args_list)
    ref_database = args.database
    reset = args.reset
    new_database = args.new_database_name
    prefix = args.prefix

    # Filters input: phage.Status=draft AND phage.HostGenus=Mycobacterium
    # Args structure: [['phage.Status=draft'], ['phage.HostGenus=Mycobacterium']]
    filters = args.filters

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]

    # Verify database connection and schema compatibility.
    print("Connecting to the MySQL database...")
    alchemist1 = AlchemyHandler(database=ref_database,
                                username=mysql_creds["user"],
                                password=mysql_creds["password"])
    alchemist1.connect(pipeline=True)
    engine1 = alchemist1.engine
    mysqldb.check_schema_compatibility(engine1, "the freeze pipeline")

    # Get SQLAlchemy metadata Table object
    # table_obj.primary_key.columns is a
    # SQLAlchemy ColumnCollection iterable object
    # Set primary key = 'phage.PhageID'
    alchemist1.build_metadata()
    table = querying.get_table(alchemist1.metadata, TARGET_TABLE)
    for column in table.primary_key.columns:
        primary_key = column

    # Create filter object and then add command line filter strings
    db_filter = Filter(alchemist=alchemist1, key=primary_key)
    db_filter.values = []

    # Attempt to add filters and exit if needed.
    add_filters(db_filter, filters)

    # Performs the query
    db_filter.update()

    # db_filter.values now contains list of PhageIDs that pass the filters.
    # Get the number of genomes that will be retained and build the
    # MYSQL DELETE statement.
    keep_set = set(db_filter.values)
    delete_stmt = construct_delete_stmt(TARGET_TABLE, primary_key, keep_set)
    count_query = construct_count_query(TARGET_TABLE, primary_key, keep_set)
    phage_count = mysqldb_basic.scalar(alchemist1.engine, count_query)

    # Determine the name of the new database.
    if new_database is None:
        if prefix is None:
            prefix = get_prefix()
        new_database = f"{prefix}_{phage_count}"

    # Create the new database, but prevent overwriting of current database.
    if engine1.url.database != new_database:
        result = mysqldb_basic.drop_create_db(engine1, new_database)
    else:
        print(
            "Error: names of the reference and frozen databases are the same.")
        print("No database will be created.")
        result = 1

    # Copy database.
    if result == 0:
        print(f"Reference database: {ref_database}")
        print(f"New database: {new_database}")
        result = mysqldb_basic.copy_db(engine1, new_database)
        if result == 0:
            print(f"Deleting genomes...")
            alchemist2 = AlchemyHandler(database=new_database,
                                        username=engine1.url.username,
                                        password=engine1.url.password)
            alchemist2.connect(pipeline=True)
            engine2 = alchemist2.engine
            engine2.execute(delete_stmt)
            if reset:
                engine2.execute(RESET_VERSION)

            # Close up all connections in the connection pool.
            engine2.dispose()
        else:
            print("Unable to copy the database.")
        # Close up all connections in the connection pool.
        engine1.dispose()
    else:
        print(f"Error creating new database: {new_database}.")
    print("Freeze database script completed.")
Ejemplo n.º 15
0
def main(argument_list):
    """
    :param argument_list:
    :return:
    """
    # Setup argument parser
    cdd_parser = setup_argparser()

    # Use argument parser to parse argument_list
    args = cdd_parser.parse_args(argument_list[2:])

    # Store arguments in more easily accessible variables
    database = args.database
    cdd_dir = expand_path(args.cdd)
    cdd_name = learn_cdd_name(cdd_dir)
    threads = args.threads
    evalue = args.evalue
    rpsblast = args.rpsblast
    tmp_dir = args.tmp_dir
    output_folder = args.output_folder
    reset = args.reset
    batch_size = args.batch_size

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]

    # Set up directory.
    output_folder = basic.set_path(output_folder, kind="dir", expect=True)
    results_folder = pathlib.Path(RESULTS_FOLDER)
    results_path = basic.make_new_dir(output_folder, results_folder,
                                      attempt=50)
    if results_path is None:
        print("Unable to create output_folder.")
        sys.exit(1)

    log_file = pathlib.Path(results_path, MAIN_LOG_FILE)

    # Set up root logger.
    logging.basicConfig(filename=log_file, filemode="w", level=logging.DEBUG,
                        format="pdm_utils find_domains: %(levelname)s: %(message)s")
    logger.info(f"pdm_utils version: {VERSION}")
    logger.info(f"CDD run date: {constants.CURRENT_DATE}")
    logger.info(f"Command line arguments: {' '.join(argument_list)}")
    logger.info(f"Results directory: {results_path}")

    # Early exit if either 1) cdd_name == "" or 2) no rpsblast given and we are
    # unable to find one
    if cdd_name == "":
        msg = (f"Unable to learn CDD database name. Make sure the files in "
              f"{cdd_dir} all have the same basename.")
        logger.error(msg)
        print(msg)
        return

    # Get the rpsblast command and path.
    if rpsblast == "":
        command = get_rpsblast_command()
        rpsblast = get_rpsblast_path(command)

    # Verify database connection and schema compatibility.
    alchemist = AlchemyHandler(database=database,
                               username=mysql_creds["user"],
                               password=mysql_creds["password"])
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    logger.info(f"Connected to database: {database}.")
    mysqldb.check_schema_compatibility(engine, "the find_domains pipeline")
    logger.info(f"Schema version is compatible.")
    logger.info("Command line arguments verified.")

    if reset:
        logger.info("Clearing all domain data currently in the database.")
        clear_domain_data(engine)

    # Get gene data that needs to be processed
    # in dict format where key = column name, value = stored value.
    cdd_genes = mysqldb_basic.query_dict_list(engine, GET_GENES_FOR_CDD)
    msg = f"{len(cdd_genes)} genes to search for conserved domains..."
    logger.info(msg)
    print(msg)

    # Only run the pipeline if there are genes returned that need it
    if len(cdd_genes) > 0:

        log_gene_ids(cdd_genes)
        make_tempdir(tmp_dir)

        # Identify unique translations to process mapped to GeneIDs.
        cds_trans_dict = create_cds_translation_dict(cdd_genes)

        unique_trans = list(cds_trans_dict.keys())
        msg = (f"{len(unique_trans)} unique translations "
               "to search for conserved domains...")
        logger.info(msg)
        print(msg)

        # Process translations in batches. Otherwise, searching could take
        # so long that MySQL connection closes resulting in 1 or more
        # transaction errors.
        batch_indices = basic.create_indices(unique_trans, batch_size)
        total_rolled_back = 0
        for indices in batch_indices:
            start = indices[0]
            stop = indices[1]
            msg = f"Processing translations {start + 1} to {stop}..."
            logger.info(msg)
            print(msg)
            sublist = unique_trans[start:stop]
            batch_rolled_back = search_translations(
                                    rpsblast, cdd_name, tmp_dir, evalue,
                                    threads, engine, sublist, cds_trans_dict)
            total_rolled_back += batch_rolled_back

        search_summary(total_rolled_back)
        engine.dispose()

    return
Ejemplo n.º 16
0
def main(unparsed_args_list):
    """Run main conversion pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]
    alchemist1 = AlchemyHandler(database=args.database,
                                username=mysql_creds["user"],
                                password=mysql_creds["password"])
    alchemist1.connect(pipeline=True)
    engine1 = alchemist1.engine


    target = args.schema_version
    actual = mysqldb.get_schema_version(engine1)
    steps, dir = get_conversion_direction(actual, target)

    # Iterate through list of versions and implement SQL files.
    if dir == "none":
        if args.verbose == True:
            print("No schema conversion is needed.")
        convert = False
    else:
        convert = True

    if convert == True:
        if (args.new_database_name is not None and
                args.new_database_name != args.database):
            result = mysqldb_basic.drop_create_db(engine1, args.new_database_name)
            if result == 0:
                result = mysqldb_basic.copy_db(engine1, args.new_database_name)
                if result == 0:
                    # Create a new connection to the new database.
                    alchemist2 = AlchemyHandler(database=args.new_database_name,
                                                username=engine1.url.username,
                                                password=engine1.url.password)
                    alchemist2.connect(pipeline=True)
                    engine2 = alchemist2.engine

                else:
                    print("Error: Unable to copy the database for conversion.")
                    convert = False
            else:
                print("Error: Unable to create the new database for conversion.")
                convert = False
        else:
            engine2 = engine1

        if convert == True:
            stop_step, summary = convert_schema(engine2, actual, dir,
                                                steps, verbose=args.verbose)
            engine2.dispose()
            if stop_step == target:
                if args.verbose == True:
                    print("\n\nThe database schema conversion was successful.")
            else:
                print("\n\nError: "
                      "The database schema conversion was not successful. "
                      f"Unable to proceed past schema version {stop_step}.")
            if args.verbose == True:
                print_summary(summary)
    engine1.dispose()
Ejemplo n.º 17
0
def main(unparsed_args_list):
    """Run main retrieve_updates pipeline."""
    # Parse command line arguments
    args = parse_args(unparsed_args_list)
    force = args.force_download
    args.output_folder = basic.set_path(args.output_folder,
                                        kind="dir",
                                        expect=True)
    working_dir = pathlib.Path(RESULTS_FOLDER)
    working_path = basic.make_new_dir(args.output_folder,
                                      working_dir,
                                      attempt=50)

    if working_path is None:
        print(f"Invalid working directory '{working_dir}'")
        sys.exit(1)

    # Create config object with data obtained from file and/or defaults.
    config = configfile.build_complete_config(args.config_file)
    mysql_creds = config["mysql"]
    ncbi_creds = config["ncbi"]

    # Verify database connection and schema compatibility.
    print("Preparing genome data sets from the MySQL database...")
    alchemist = AlchemyHandler(database=args.database,
                               username=mysql_creds["user"],
                               password=mysql_creds["password"])
    alchemist.connect(pipeline=True)
    engine = alchemist.engine
    mysqldb.check_schema_compatibility(engine, "the get_data pipeline")

    # Get existing data from MySQL to determine what needs to be updated.
    query = ("SELECT PhageID, Name, HostGenus, Status, Cluster, "
             "DateLastModified, Accession, RetrieveRecord, Subcluster, "
             "AnnotationAuthor FROM phage")

    mysqldb_genome_list = mysqldb.parse_genome_data(engine=engine,
                                                    phage_query=query,
                                                    gnm_type="mysqldb")
    engine.dispose()
    mysqldb_genome_dict = {}
    for gnm in mysqldb_genome_list:
        # With default date, the date of all records retrieved will be newer.
        if force:
            gnm.date = constants.EMPTY_DATE
        mysqldb_genome_dict[gnm.id] = gnm

    # Get data from PhagesDB
    if (args.updates or args.final or args.draft) is True:
        print("Retrieving data from PhagesDB...")
        phagesdb_phages = phagesdb.get_phagesdb_data(constants.API_SEQUENCED)
        phagesdb_phages_dict = basic.convert_list_to_dict(
            phagesdb_phages, "phage_name")
        phagesdb_genome_dict = phagesdb.parse_genomes_dict(
            phagesdb_phages_dict, gnm_type="phagesdb", seq=False)

        # Exit if all phage data wasn't retrieved.
        if len(phagesdb_genome_dict) == 0:
            sys.exit(1)

        # Returns a list of tuples.
        tup = match_genomes(mysqldb_genome_dict, phagesdb_genome_dict)
        matched_genomes = tup[0]
        unmatched_phagesdb_ids = tup[1]

    if args.updates is True:
        get_update_data(working_path, matched_genomes)
    if args.final is True:
        get_final_data(working_path, matched_genomes)
    if args.genbank is True:
        get_genbank_data(working_path,
                         mysqldb_genome_dict,
                         ncbi_creds,
                         args.genbank_results,
                         force=force)
    if args.draft is True:
        if force:
            # Add all draft genomes currently in database to the list of
            # draft genomes to be downloaded.
            drafts = get_matched_drafts(matched_genomes)
            unmatched_phagesdb_ids |= drafts
        get_draft_data(working_path, unmatched_phagesdb_ids)
Ejemplo n.º 18
0
def main(unparsed_args):
    """
    Driver function for the push pipeline.

    :param unparsed_args: the command-line arguments given to this
                          pipeline's caller (likely pdm_utils.__main__)
    :type unparsed_args: list
    """
    # Parse the command line args with argparse
    # Assumed command line arg structure:
    # python3 -m pdm_utils.run <pipeline> <additional args...>
    # sys.argv:      [0]            [1]         [2...]
    args = parse_args(unparsed_args[2:])

    # Parse config file if one was given
    config = configfile.build_complete_config(args.config_file)
    server_host = config["upload_server"]["host"]
    remote_dir = config["upload_server"]["dest"]
    user = config["upload_server"]["user"]
    password = config["upload_server"]["password"]

    # Command line hostname and destination override config file defaults
    if args.server_host is not None:
        server_host = args.server_host
    if args.remote_directory is not None:
        remote_dir = args.remote_directory
    key_file = args.key_file

    # Can't upload files to unknown host or destination
    if server_host is None or remote_dir is None:
        print("No hostname and/or remote directory provided. Unable "
              "to upload file(s).")
        sys.exit(1)

    # Get the list of files to upload
    file_list = get_files(args.directory, args.file, ignore={".DS_Store"})

    if len(file_list) > 0:
        if args.log_file:
            paramiko.util.log_to_file(args.log_file)

        # Keep track of failed uploads
        failures = list()

        # Setup paramiko connection to server
        with paramiko.Transport(server_host) as transport:
            # Context manager so Transport will be closed automatically
            if user and key_file:
                # Username and key-file is preferred authentication method
                try:
                    key = paramiko.RSAKey.from_private_key_file(key_file)
                except paramiko.ssh_exception.SSHException:
                    print(f"'{key_file}' is not a valid RSA key file")
                    sys.exit(1)
                try:
                    transport.connect(username=user, pkey=key)
                except paramiko.ssh_exception.AuthenticationException:
                    print(f"Authentication failed with user '{user}' and "
                          f"key-file '{key_file}'")
                    sys.exit(1)
            elif user and password:
                # Username and password from config is next preferred method
                try:
                    transport.connect(username=user, password=password)
                except paramiko.ssh_exception.AuthenticationException:
                    p = f"{password[0]}{'*' * (len(password)-2)}{password[-1]}"
                    print(f"Authentication failed with user '{user}' and "
                          f"password '{p}'")
                    sys.exit(1)
            else:
                # Finally, get username and password from command line
                user = getpass.getpass(f"Enter username for {server_host}: ")
                password = getpass.getpass(f"Enter password for "
                                           f"{user}@{server_host}: ")
                try:
                    transport.connect(username=user, password=password)
                except paramiko.ssh_exception.AuthenticationException:
                    p = f"{password[0]}{'*' * (len(password)-2)}{password[-1]}"
                    print(f"Authentication failed with user '{user}' and "
                          f"password '{p}'")
                    sys.exit(1)
            with paramiko.SFTPClient.from_transport(transport) as sftp_client:
                # Context manager so SFTPClient will be closed automatically
                for local_file in file_list:
                    # sftp_client.put requires remote filename not remote dir
                    remote_file = remote_dir.joinpath(local_file.name)
                    try:
                        print(f"Uploading {str(local_file)}...")
                        sftp_client.put(str(local_file), str(remote_file))
                    except OSError:
                        failures.append(local_file)
        for file in failures:
            print(f"Could not upload {str(file)}")
    else:
        print("No files to upload")