Python get_fasta_files Examples, pyani.pyani_files.get_fasta_files Python Examples

Example #1

0

Show file

File: test_anim.py Project: HuttonICS/pyani

 def test_process_deltadir(self):
     """processes directory of .delta files into ANIResults."""
     seqfiles = pyani_files.get_fasta_files(self.seqdir)
     orglengths = pyani_files.get_sequence_lengths(seqfiles)
     result = anim.process_deltadir(self.deltadir, orglengths)
     assert_frame_equal(result.percentage_identity.sort_index(1).sort_index(),
                        self.df_pid.sort_index(1).sort_index())

Example #2

0

Show file

def test_aniblastall_concordance():
    """Test concordance of ANIblastall method with JSpecies output."""
    # Make/check output directory
    mode = "ANIblastall"
    outdirname = delete_and_remake_outdir(mode)

    # Get dataframes of JSpecies output
    aniblastall_jspecies = parse_table(JSPECIES_OUTFILE, 'ANIb')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test ANIblastall concordance:
    # Make fragments
    fragfiles, fraglengths = anib.fragment_fasta_files(infiles, outdirname,
                                                       pyani_config.FRAGSIZE)

    # Build jobgraph
    jobgraph = anib.make_job_graph(
        infiles, fragfiles,
        anib.make_blastcmd_builder("ANIblastall", outdirname))
    print("\nJobgraph:\n", jobgraph)
    print("\nJob 0:\n", jobgraph[0].script)

    # Run jobgraph with multiprocessing
    run_dependency_graph(jobgraph)
    print("Ran multiprocessing jobs")

    # Process BLAST; the pid data is in anib_data[1]
    aniblastall_data = anib.process_blast(outdirname,
                                          org_lengths,
                                          fraglengths,
                                          mode="ANIblastall")
    aniblastall_pid = \
        aniblastall_data.percentage_identity.sort_index(axis=0).\
        sort_index(axis=1) * 100.

    index, columns = aniblastall_pid.index, aniblastall_pid.columns
    diffmat = aniblastall_pid.as_matrix() - aniblastall_jspecies.as_matrix()
    aniblastall_diff = pd.DataFrame(diffmat, index=index, columns=columns)

    # Write dataframes to file, for reference
    aniblastall_pid.to_csv(os.path.join(outdirname, 'ANIblastall_pid.tab'),
                           sep='\t')
    aniblastall_jspecies.to_csv(os.path.join(outdirname,
                                             'ANIblastall_jspecies.tab'),
                                sep='\t')
    aniblastall_diff.to_csv(os.path.join(outdirname, 'ANIblastall_diff.tab'),
                            sep='\t')
    print("ANIblastall concordance test output placed in %s" % outdirname)
    print("ANIblastall PID:\n", aniblastall_pid)
    print("ANIblastall JSpecies:\n", aniblastall_jspecies)
    print("ANIblastall diff:\n", aniblastall_diff)

    # We'd like the absolute difference reported to be < ANIBLASTALL_THRESHOLD
    max_diff = aniblastall_diff.abs().values.max()
    print("Maximum difference for ANIblastall: %e" % max_diff)
    assert_less(max_diff, ANIB_THRESHOLD)

Example #3

0

Show file

File: test_anim.py Project: zwets/pyani

 def test_process_deltadir(self):
     """processes directory of .delta files into ANIResults."""
     seqfiles = pyani_files.get_fasta_files(self.seqdir)
     orglengths = pyani_files.get_sequence_lengths(seqfiles)
     result = anim.process_deltadir(self.deltadir, orglengths)
     assert_frame_equal(
         result.percentage_identity.sort_index(1).sort_index(),
         self.df_pid.sort_index(1).sort_index())

Example #4

0

Show file

def test_deltadir_parsing(delta_output_dir):
    """Process test directory of .delta files into ANIResults."""
    seqfiles = pyani_files.get_fasta_files(delta_output_dir.seqdir)
    orglengths = pyani_files.get_sequence_lengths(seqfiles)
    result = anim.process_deltadir(delta_output_dir.deltadir, orglengths)
    assert_frame_equal(
        result.percentage_identity.sort_index(1).sort_index(),
        delta_output_dir.deltaresult.sort_index(1).sort_index(),
    )

Example #5

0

Show file

File: test_concordance.py Project: brwnj/pyani

def test_anib_concordance():
    """Test concordance of ANIb method with JSpecies output.

    This may take some time. Please be patient.
    """
    # Make/check output directory
    mode = "ANIb"
    outdirname = make_outdir(mode)

    # Get dataframes of JSpecies output
    anib_jspecies = parse_table(JSPECIES_OUTFILE, 'ANIb')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test ANIb concordance:
    # Make fragments
    fragfiles, fraglengths = anib.fragment_FASTA_files(infiles, outdirname,
                                                       pyani_config.FRAGSIZE)
    # Build databases
    cmdlist = anib.generate_blastdb_commands(infiles, outdirname,
                                             pyani_config.MAKEBLASTDB_DEFAULT,
                                             mode="ANIb")
    multiprocessing_run(cmdlist)
    # Run pairwise BLASTN
    cmdlist = anib.generate_blastn_commands(fragfiles, outdirname,
                                            pyani_config.BLASTN_DEFAULT,
                                            mode="ANIb")
    multiprocessing_run(cmdlist, verbose=False)
    # Process BLAST; the pid data is in anib_data[1]
    anib_data = anib.process_blast(outdirname, org_lengths, fraglengths,
                                   mode="ANIb")
    anib_pid = anib_data[1].sort(axis=0).sort(axis=1) * 100.

    index, columns = anib_pid.index, anib_pid.columns
    diffmat = anib_pid.as_matrix() - anib_jspecies.as_matrix()
    anib_diff = pd.DataFrame(diffmat, index=index, columns=columns)

    # Write dataframes to file, for reference
    anib_pid.to_csv(os.path.join(outdirname,
                                'ANIb_pid.tab'),
                   sep='\t')
    anib_jspecies.to_csv(os.path.join(outdirname,
                                      'ANIb_jspecies.tab'),
                         sep='\t')
    anib_diff.to_csv(os.path.join(outdirname,
                                  'ANIb_diff.tab'),
                     sep='\t')
    print "ANIb concordance test output placed in %s" % outdirname
    print anib_pid, anib_jspecies, anib_diff

    # We'd like the absolute difference reported to be < ANIB_THRESHOLD
    max_diff = anib_diff.abs().values.max()
    print "Maximum difference for ANIb: %e" % max_diff
    assert_less(max_diff, ANIB_THRESHOLD)

Example #6

0

Show file

def test_anib_concordance():
    """Test concordance of ANIb method with JSpecies output.

    This may take some time. Please be patient.
    """
    # Make/check output directory
    mode = "ANIb"
    outdirname = make_outdir(mode)

    # Get dataframes of JSpecies output
    anib_jspecies = parse_table(JSPECIES_OUTFILE, 'ANIb')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test ANIb concordance:
    # Make fragments
    fragfiles, fraglengths = anib.fragment_FASTA_files(infiles, outdirname,
                                                       pyani_config.FRAGSIZE)
    # Build databases
    cmdlist = anib.generate_blastdb_commands(infiles,
                                             outdirname,
                                             pyani_config.MAKEBLASTDB_DEFAULT,
                                             mode="ANIb")
    multiprocessing_run(cmdlist)
    # Run pairwise BLASTN
    cmdlist = anib.generate_blastn_commands(fragfiles,
                                            outdirname,
                                            pyani_config.BLASTN_DEFAULT,
                                            mode="ANIb")
    multiprocessing_run(cmdlist, verbose=False)
    # Process BLAST; the pid data is in anib_data[1]
    anib_data = anib.process_blast(outdirname,
                                   org_lengths,
                                   fraglengths,
                                   mode="ANIb")
    anib_pid = anib_data[1].sort(axis=0).sort(axis=1) * 100.

    index, columns = anib_pid.index, anib_pid.columns
    diffmat = anib_pid.as_matrix() - anib_jspecies.as_matrix()
    anib_diff = pd.DataFrame(diffmat, index=index, columns=columns)

    # Write dataframes to file, for reference
    anib_pid.to_csv(os.path.join(outdirname, 'ANIb_pid.tab'), sep='\t')
    anib_jspecies.to_csv(os.path.join(outdirname, 'ANIb_jspecies.tab'),
                         sep='\t')
    anib_diff.to_csv(os.path.join(outdirname, 'ANIb_diff.tab'), sep='\t')
    print "ANIb concordance test output placed in %s" % outdirname
    print anib_pid, anib_jspecies, anib_diff

    # We'd like the absolute difference reported to be < ANIB_THRESHOLD
    max_diff = anib_diff.abs().values.max()
    print "Maximum difference for ANIb: %e" % max_diff
    assert_less(max_diff, ANIB_THRESHOLD)

Example #7

0

Show file

File: ANI_Wrapper_2.py Project: LongTianPy/LIN_proto

def unified_anib(indirname,User_ID):
    # Build BLAST databases and run pairwise BLASTN
    # Fraglengths does not get reused with BLASTN
    os.mkdir(indirname+'{0}_out/'.format(User_ID))
    os.system("chmod 777 {0}".format(indirname+'{0}_out'.format(User_ID)))
    logging.basicConfig(level=logging.DEBUG, filename="/home/linproject/Workspace/LIN_log/logfile_{0}".format(User_ID),
                        filemode="a+", format="%(asctime)-15s %(levelname)-8s %(message)s")
    infiles = pyani_files.get_fasta_files(indirname)
    org_lengths = pyani_files.get_sequence_lengths(infiles)
    fragsize = pyani_config.FRAGSIZE
    filestems = pyani_config.ANIB_FILESTEMS
    filenames = os.listdir(indirname)
    for fname in filenames:
        if ' ' in  os.path.abspath(fname):
            logging.error("File or directory '%s' contains whitespace" % fname)
            logging.error("This will cause issues with MUMmer and BLAST")
            logging.error("(exiting)")
            sys.exit(1)
    fragfiles, fraglengths = anib.fragment_FASTA_files(infiles, indirname+'{0}_out/'.format(User_ID), fragsize)
    # Export fragment lengths as JSON, in case we re-run BLASTALL with
    # --skip_blastn
    with open(os.path.join(indirname+'{0}_out/'.format(User_ID), 'fraglengths.json'), 'w') as outfile:
        json.dump(fraglengths, outfile)
    # Which executables are we using?
    format_exe = pyani_config.FORMATDB_DEFAULT
    blast_exe = pyani_config.BLASTALL_DEFAULT
    # Run BLAST database-building and executables from a jobgraph
    logging.info("Creating job dependency graph")
    jobgraph = anib.make_job_graph(infiles, fragfiles, indirname+'{0}_out/'.format(User_ID), format_exe, blast_exe, 'ANIblastall')

    logging.info("Running jobs with multiprocessing")
    logging.info("Running job dependency graph")
    cumval = run_mp.run_dependency_graph(jobgraph, verbose=False,
                                         logger=logging)
    if 0 < cumval:
        logging.warning("At least one BLAST run failed. " +
                       "%s may fail." % 'ANIblastall')
    else:
        logging.info("All multiprocessing jobs complete.")

    # Process pairwise BLASTN output
    logging.info("Processing pairwise %s BLAST output." % 'ANIblastall')
    try:
        data = anib.process_blast(indirname+'{0}_out/'.format(User_ID), org_lengths,
                                  fraglengths=fraglengths, mode='ANIblastall')
    except ZeroDivisionError:
        logging.error("One or more BLAST output files has a problem.")
        if 0 < cumval:
            logging.error("This is possibly due to BLASTN run failure, " +
                         "please investigate")
        else:
            logging.error("This is possibly due to ara BLASTN comparison " +
                         "being too distant for use.")
        logging.error(last_exception())
    return data[1]

Example #8

0

Show file

def test_anim_concordance():
    """Test concordance of ANIm method with JSpecies output."""
    # Make/check output directory
    mode = "ANIm"
    outdirname = delete_and_remake_outdir(mode)
    nucmername = os.path.join(outdirname, 'nucmer_output')
    os.makedirs(nucmername, exist_ok=True)

    # Get dataframes of JSpecies output
    anim_jspecies = parse_table(JSPECIES_OUTFILE, 'ANIm')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test ANIm concordance:
    # Run pairwise NUCmer
    cmdlist = anim.generate_nucmer_commands(infiles, outdirname,
                                            pyani_config.NUCMER_DEFAULT)
    print('\n'.join(cmdlist))
    multiprocessing_run(cmdlist)
    # Process .delta files
    results = anim.process_deltadir(nucmername, org_lengths)
    anim_pid = \
        results.percentage_identity.sort_index(axis=0).sort_index(axis=1) * 100.

    print("ANIm data\n", results)

    index, columns = anim_pid.index, anim_pid.columns
    diffmat = anim_pid.as_matrix() - anim_jspecies.as_matrix()
    anim_diff = pd.DataFrame(diffmat, index=index, columns=columns)

    # Write dataframes to file, for reference
    anim_pid.to_csv(os.path.join(outdirname, 'ANIm_pid.tab'), sep='\t')
    anim_jspecies.to_csv(os.path.join(outdirname, 'ANIm_jspecies.tab'),
                         sep='\t')
    anim_diff.to_csv(os.path.join(outdirname, 'ANIm_diff.tab'), sep='\t')
    print("ANIm concordance test output placed in %s" % outdirname)
    print("ANIm PID\n", anim_pid)
    print("ANIm JSpecies\n", anim_jspecies)
    print("ANIm diff\n", anim_diff)

    # We'd like the absolute difference reported to be < ANIB_THRESHOLD
    max_diff = anim_diff.abs().values.max()
    print("Maximum difference for ANIm: %e" % max_diff)
    assert_less(max_diff, ANIM_THRESHOLD)

Example #9

0

Show file

File: test_concordance.py Project: brwnj/pyani

def test_anim_concordance():
    """Test concordance of ANIm method with JSpecies output."""
    # Make/check output directory
    mode = "ANIm"
    outdirname = make_outdir(mode)

    # Get dataframes of JSpecies output
    anim_jspecies = parse_table(JSPECIES_OUTFILE, 'ANIm')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test ANIm concordance:
    # Run pairwise NUCmer
    cmdlist = anim.generate_nucmer_commands(infiles, outdirname,
                                            pyani_config.NUCMER_DEFAULT)
    multiprocessing_run(cmdlist, verbose=False)
    # Process .delta files
    anim_data = anim.process_deltadir(outdirname, org_lengths)
    anim_pid = anim_data[1].sort(axis=0).sort(axis=1) * 100.

    print anim_data

    index, columns = anim_pid.index, anim_pid.columns
    diffmat = anim_pid.as_matrix() - anim_jspecies.as_matrix()
    anim_diff = pd.DataFrame(diffmat, index=index, columns=columns)

    # Write dataframes to file, for reference
    anim_pid.to_csv(os.path.join(outdirname,
                                'ANIm_pid.tab'),
                   sep='\t')
    anim_jspecies.to_csv(os.path.join(outdirname,
                                      'ANIm_jspecies.tab'),
                         sep='\t')
    anim_diff.to_csv(os.path.join(outdirname,
                                  'ANIm_diff.tab'),
                     sep='\t')
    print "ANIm concordance test output placed in %s" % outdirname
    print anim_pid, anim_jspecies, anim_diff

    # We'd like the absolute difference reported to be < ANIB_THRESHOLD
    max_diff = anim_diff.abs().values.max()
    print "Maximum difference for ANIm: %e" % max_diff
    assert_less(max_diff, ANIM_THRESHOLD)

Example #10

0

Show file

File: test_concordance.py Project: ohmeta/pyani

 def setUp(self):
     """Set values and parameters for tests."""
     self.indir = os.path.join("tests", "test_input", "concordance")
     self.outdir = os.path.join("tests", "test_output", "concordance")
     self.tgtdir = os.path.join("tests", "test_targets", "concordance")
     self.deltadir = os.path.join(self.outdir, "nucmer_output")
     self.infiles = pyani_files.get_fasta_files(self.indir)
     self.orglengths = pyani_files.get_sequence_lengths(self.infiles)
     self.target = parse_jspecies(os.path.join(self.tgtdir, "jspecies_output.tab"))
     self.tolerance = {
         "ANIm": 0.1,
         "ANIb_lo": 5,
         "ANIb_hi": 0.1,
         "ANIblastall": 0.1,
         "TETRA": 0.1,
     }
     self.fragsize = 1020
     os.makedirs(self.outdir, exist_ok=True)
     os.makedirs(self.deltadir, exist_ok=True)

Example #11

0

Show file

File: test_concordance.py Project: rhysnewell/pyani

 def setUp(self):
     """Set values and parameters for tests."""
     testdir = Path("tests")
     self.indir = testdir / "test_input" / "concordance"
     self.outdir = testdir / "test_output" / "concordance"
     self.tgtdir = testdir / "test_targets" / "concordance"
     self.deltadir = self.outdir / "nucmer_output"
     self.infiles = pyani_files.get_fasta_files(self.indir)
     self.orglengths = pyani_files.get_sequence_lengths(self.infiles)
     self.target = parse_jspecies(self.tgtdir / "jspecies_output.tab")
     self.tolerance = {
         "ANIm": 0.1,
         "ANIb_lo": 5,
         "ANIb_hi": 0.1,
         "ANIblastall": 0.1,
         "TETRA": 0.1,
     }
     self.fragsize = 1020
     self.outdir.mkdir(exist_ok=True)
     self.deltadir.mkdir(exist_ok=True)

Example #12

0

Show file

File: test_concordance.py Project: brwnj/pyani

def test_tetra_concordance():
    """Test concordance of TETRA method with JSpecies output."""
    # Make/check output directory
    mode = "TETRA"
    outdirname = make_outdir(mode)

    # Get dataframes of JSpecies output
    tetra_jspecies = parse_table(JSPECIES_OUTFILE, 'Tetra')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test TETRA concordance
    tetra_zscores = {}
    for filename in infiles:
        org = os.path.splitext(os.path.split(filename)[-1])[0]
        tetra_zscores[org] = tetra.calculate_tetra_zscore(filename)
    tetra_correlations = tetra.calculate_correlations(tetra_zscores)
    index, columns = tetra_correlations.index, tetra_correlations.columns
    tetra_diff = pd.DataFrame(tetra_correlations.as_matrix() -\
                              tetra_jspecies.as_matrix(),
                              index=index, columns=columns)

    # Write dataframes to file, for reference
    tetra_correlations.to_csv(os.path.join(outdirname,
                                           'tetra_correlations.tab'),
                              sep='\t')
    tetra_jspecies.to_csv(os.path.join(outdirname,
                                       'tetra_jspecies.tab'),
                          sep='\t')
    tetra_diff.to_csv(os.path.join(outdirname,
                                   'tetra_diff.tab'),
                      sep='\t')
    print "TETRA concordance test output placed in %s" % outdirname
    print tetra_correlations, tetra_jspecies, tetra_diff

    # We'd like the absolute difference reported to be < TETRA_THRESHOLD
    max_diff = tetra_diff.abs().values.max()
    print "Maximum difference for TETRA: %e" % max_diff
    assert_less(max_diff, TETRA_THRESHOLD)

Example #13

0

Show file

def test_tetra_concordance():
    """Test concordance of TETRA method with JSpecies output."""
    # Make/check output directory
    mode = "TETRA"
    outdirname = delete_and_remake_outdir(mode)

    # Get dataframes of JSpecies output
    tetra_jspecies = parse_table(JSPECIES_OUTFILE, 'Tetra')

    # Identify our input files, and the total lengths of each organism seq
    infiles = pyani_files.get_fasta_files(INDIRNAME)
    org_lengths = pyani_files.get_sequence_lengths(infiles)

    # Test TETRA concordance
    tetra_zscores = {}
    for filename in infiles:
        org = os.path.splitext(os.path.split(filename)[-1])[0]
        tetra_zscores[org] = tetra.calculate_tetra_zscore(filename)
    tetra_correlations = tetra.calculate_correlations(tetra_zscores)
    index, columns = tetra_correlations.index, tetra_correlations.columns
    tetra_diff = pd.DataFrame(tetra_correlations.as_matrix() -\
                              tetra_jspecies.as_matrix(),
                              index=index, columns=columns)

    # Write dataframes to file, for reference
    tetra_correlations.to_csv(os.path.join(outdirname,
                                           'tetra_correlations.tab'),
                              sep='\t')
    tetra_jspecies.to_csv(os.path.join(outdirname, 'tetra_jspecies.tab'),
                          sep='\t')
    tetra_diff.to_csv(os.path.join(outdirname, 'tetra_diff.tab'), sep='\t')
    print("TETRA concordance test output placed in %s" % outdirname)
    print("TETRA correlations:\n", tetra_correlations)
    print("TETRA JSpecies:\n", tetra_jspecies)
    print("TETRA diff:\n", tetra_diff)

    # We'd like the absolute difference reported to be < TETRA_THRESHOLD
    max_diff = tetra_diff.abs().values.max()
    print("Maximum difference for TETRA: %e" % max_diff)
    assert_less(max_diff, TETRA_THRESHOLD)

Example #14

0

Show file

File: average_nucleotide_identity.py Project: jhenriksen-agbiome/pyani

    if args.rerender:
        logger.warning("--rerender option used")
        logger.warning("Producing graphics with no new recalculations")
    else:
        # Have we got a valid scheduler choice?
        schedulers = ["multiprocessing", "SGE"]
        if args.scheduler not in schedulers:
            logger.error("scheduler %s not recognised (exiting)",
                         args.scheduler)
            logger.error("Valid schedulers are: %s", '; '.join(schedulers))
            sys.exit(1)
        logger.info("Using scheduler method: %s", args.scheduler)

        # Get input files
        logger.info("Identifying FASTA files in %s", args.indirname)
        infiles = pyani_files.get_fasta_files(args.indirname)
        logger.info("Input files:\n\t%s", '\n\t'.join(infiles))

        # Are we subsampling? If so, make the selection here
        if args.subsample:
            infiles = subsample_input(infiles)
            logger.info("Sampled input files:\n\t%s", '\n\t'.join(infiles))

        # Get lengths of input sequences
        logger.info("Processing input sequence lengths")
        org_lengths = pyani_files.get_sequence_lengths(infiles)
        logger.info("Sequence lengths:\n" + os.linesep.join(
            ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())]))

        # Run appropriate method on the contents of the input directory,
        # and write out corresponding results.

Example #15

0

Show file

File: average_nucleotide_identity.py Project: ohmeta/pyani

    if args.rerender:
        logger.warning("--rerender option used")
        logger.warning("Producing graphics with no new recalculations")
    else:
        # Have we got a valid scheduler choice?
        schedulers = ["multiprocessing", "SGE"]
        if args.scheduler not in schedulers:
            logger.error("scheduler %s not recognised (exiting)",
                         args.scheduler)
            logger.error("Valid schedulers are: %s", "; ".join(schedulers))
            sys.exit(1)
        logger.info("Using scheduler method: %s", args.scheduler)

        # Get input files
        logger.info("Identifying FASTA files in %s", args.indirname)
        infiles = pyani_files.get_fasta_files(args.indirname, recurse=True)
        print(f"infiles: {infiles}")
        logger.info("Input files:\n\t%s", "\n\t".join(infiles))

        # Are we subsampling? If so, make the selection here
        if args.subsample:
            infiles = subsample_input(infiles)
            logger.info("Sampled input files:\n\t%s", "\n\t".join(infiles))

        # Get lengths of input sequences
        logger.info("Processing input sequence lengths")
        org_lengths = pyani_files.get_sequence_lengths(infiles)
        logger.info("Sequence lengths:\n" + os.linesep.join(
            ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())]))

        # Run appropriate method on the contents of the input directory,

Example #16

0

Show file

File: average_nucleotide_identity.py Project: LongTianPy/pyani

        logger.error("ANI method %s not recognised (exiting)" % args.method)
        logger.error("Valid methods are: %s" % methods.keys())
        sys.exit(1)
    logger.info("Using ANI method: %s" % args.method)

    # Have we got a valid scheduler choice?
    schedulers = ["multiprocessing", "SGE"]
    if args.scheduler not in schedulers:
        logger.error("scheduler %s not recognised (exiting)" % args.scheduler)
        logger.error("Valid schedulers are: %s" % '; '.join(schedulers))
        sys.exit(1)
    logger.info("Using scheduler method: %s" % args.scheduler)

    # Get input files
    logger.info("Identifying FASTA files in %s" % args.indirname)
    infiles = pyani_files.get_fasta_files(args.indirname)
    logger.info("Input files:\n\t%s" % '\n\t'.join(infiles))

    # Get lengths of input sequences
    logger.info("Processing input sequence lengths")
    org_lengths = pyani_files.get_sequence_lengths(infiles)
    logger.info("Sequence lengths:\n" +
                os.linesep.join(["\t%s: %d" % (k, v) for
                                 k, v in org_lengths.items()]))

    # Run appropriate method on the contents of the input directory,
    # and write out corresponding results.
    logger.info("Carrying out %s analysis" % args.method)
    results = methods[args.method][0](infiles, org_lengths)
    write(results, methods[args.method][1])

Example #17

0

Show file

File: average_nucleotide_identity.py Project: baileythegreen/pyani

def run_main(argsin: Optional[Namespace] = None) -> int:
    """Run main process for average_nucleotide_identity.py script.

    :param argsin:  Namespace, command-line arguments
    :param logger:  logging object
    """
    time0 = time.time()

    # Process command-line and build logger
    args = process_arguments(argsin)
    logger = logging.getLogger(__name__)
    config_logger(args)

    # Ensure argument validity and get method function/config
    test_class_label_paths(args, logger)
    test_scheduler(args, logger)
    method_function, method_config = get_method(args)
    make_outdirs(args)

    # Skip calculations (or not) depending on rerender option
    if args.rerender:
        logger.warning(
            "--rerender option used. Producing graphics with no new recalculations"
        )
    else:
        # Run ANI comparisons
        logger.info("Identifying FASTA files in %s", args.indirname)
        infiles = pyani_files.get_fasta_files(args.indirname)
        logger.info("Input files:\n\t%s",
                    "\n\t".join([str(_) for _ in infiles]))

        # Are we subsampling? If so, make the selection here
        if args.subsample:
            infiles = subsample_input(args, logger, infiles)
            logger.info("Sampled input files:\n\t%s",
                        "\n\t".join([str(_) for _ in infiles]))

        # Get lengths of input sequences
        logger.info("Processing input sequence lengths")
        org_lengths = pyani_files.get_sequence_lengths(infiles)
        seqlens = os.linesep.join(
            ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())])
        logger.info("Sequence lengths:\n%s", seqlens)

        # Run appropriate method on the contents of the input directory,
        # and write out corresponding results.
        logger.info("Carrying out %s analysis", args.method)
        if args.method == "TETRA":
            results = method_function(infiles)
        else:
            results = method_function(args, infiles, org_lengths)
        write(args, results)

    # Do we want graphical output?
    if args.graphics or args.rerender:
        logger.info("Rendering output graphics")
        logger.info("Formats requested: %s", args.gformat)
        for gfmt in args.gformat.split(","):
            logger.info("Graphics format: %s", gfmt)
            logger.info("Graphics method: %s", args.gmethod)
            draw(args, method_config, gfmt)

    # Close any open matplotlib figures
    plt.close("all")

    # Report that we've finished
    logger.info("Done: %s.", time.asctime())
    logger.info("Time taken: %.2fs", (time.time() - time0))

    # Exit
    return 0