Beispiel #1
0
def main(argv):
    argparse_usage = (
        'fungap.py -g <genome_assembly> -12UA <trans_read_files> '
        '-o <output_dir> -a <augustus_species> '
        '-s <sister_proteome>')
    parser = ArgumentParser(usage=argparse_usage)
    parser.add_argument('-o',
                        '--output_dir',
                        nargs='?',
                        default='fungap_out',
                        help='Output directory (default: fungap_out)')
    parser.add_argument('-1',
                        '--trans_read_1',
                        nargs='?',
                        default='',
                        help='Paired-end read1 "<prefix>_1.fastq"')
    parser.add_argument('-2',
                        '--trans_read_2',
                        nargs='?',
                        default='',
                        help='Paired-end read2 "<prefix>_2.fastq"')
    parser.add_argument('-U',
                        '--trans_read_single',
                        nargs='?',
                        default='',
                        help='Single read "<prefix>_s.fastq"')
    parser.add_argument(
        '-A',
        '--trans_bam',
        nargs='?',
        default='',
        help='BAM file (RNA-seq reads alignment to a genome assembly')
    parser.add_argument('-g',
                        '--genome_assembly',
                        nargs=1,
                        required=True,
                        help='Genome assembly file in FASTA format')
    parser.add_argument('-a',
                        '--augustus_species',
                        nargs=1,
                        required=True,
                        help='AUGUSTUS species')
    parser.add_argument('-s',
                        '--sister_proteome',
                        nargs=1,
                        required=True,
                        help='Sister proteome sequences in .faa')
    parser.add_argument('-c',
                        '--num_cores',
                        nargs='?',
                        default=1,
                        type=int,
                        help='Number of cores to be used (default: 1)')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s {}'.format(__version__))

    # Options for non-fungus genome
    parser.add_argument(
        '--no_braker_fungus',
        action='store_true',
        help='No --fungus flag in BRAKER for non-fungus genomes')
    parser.add_argument(
        '--no_jaccard_clip',
        action='store_true',
        help='No --jaccard_clip flag in Trinity for non-fungus genomes')
    parser.add_argument(
        '--no_genemark_fungus',
        action='store_true',
        help='No --fungus flag in GeneMark for non-fungus genomes')
    parser.add_argument('-M',
                        '--max_intron',
                        nargs='?',
                        default=2000,
                        type=int,
                        help='Max intron length (Default: 2000 bp)')

    args = parser.parse_args()
    output_dir = os.path.abspath(args.output_dir)
    trans_read_1 = args.trans_read_1
    trans_read_2 = args.trans_read_2
    trans_read_single = args.trans_read_single
    trans_bam = args.trans_bam
    genome_assembly = os.path.abspath(args.genome_assembly[0])
    augustus_species = args.augustus_species[0]
    sister_proteome = os.path.abspath(args.sister_proteome[0])
    num_cores = args.num_cores
    max_intron = args.max_intron

    # For non-fungus genomes
    if args.no_braker_fungus:
        no_braker_fungus = ''
    else:
        no_braker_fungus = '--fungus'

    if args.no_jaccard_clip:
        no_jaccard_clip = ''
    else:
        no_jaccard_clip = '--jaccard_clip'

    if args.no_genemark_fungus:
        no_genemark_fungus = ''
    else:
        no_genemark_fungus = '--gmes_fungus'

    # Create nessasary dirs
    create_dir(output_dir)

    # Set logging
    log_file = os.path.join(output_dir, 'logs', 'fungap.log')
    global logger_time, logger_txt
    logger_time, logger_txt = set_logging(log_file)

    logger_txt.debug('\n============ New Run {} ============'.format(
        datetime.now()))

    # Run functions :) Slow is as good as Fast
    trans_read_files = check_inputs(trans_read_1, trans_read_2,
                                    trans_read_single, trans_bam,
                                    genome_assembly, sister_proteome)
    trans_bams = run_hisat2(genome_assembly, trans_read_files, output_dir,
                            num_cores, max_intron)
    trinity_asms = run_trinity(trans_bams, output_dir, num_cores,
                               no_jaccard_clip, max_intron)
    repeat_model_file = run_repeat_modeler(genome_assembly, output_dir,
                                           num_cores)
    maker_gff3s, maker_faas = run_maker(genome_assembly, output_dir,
                                        augustus_species, sister_proteome,
                                        num_cores, repeat_model_file,
                                        trinity_asms, no_genemark_fungus)
    # Get masked assembly
    masked_assembly = os.path.join(output_dir, 'maker_out',
                                   'masked_assembly.fasta')

    # Run Augustus
    augustus_gff3, augustus_faa = run_augustus(masked_assembly, output_dir,
                                               augustus_species)

    # Run Braker1
    braker1_gff3s, braker1_faas = run_braker1(masked_assembly, trans_bams,
                                              output_dir, num_cores,
                                              no_braker_fungus)

    # Run BUSCO on each gene models
    faa_files = [augustus_faa] + maker_faas + braker1_faas
    for faa_file in faa_files:
        run_busco(faa_file, output_dir, num_cores)
    busco_out_dir = os.path.join(output_dir, 'busco_out')

    # Get protein nr by removing identical proteins
    nr_prot_file, nr_prot_mapping_file = make_nr_prot(faa_files, output_dir)

    # Run BLASTp with nr prot file
    blastp_output = run_blastp(nr_prot_file, output_dir, sister_proteome,
                               num_cores)

    # Run Pfam_scan with nr prot file
    pfam_scan_out = run_pfam_scan(nr_prot_file, output_dir, num_cores)

    # Concatenate all transcripts files
    gene_filtering_dir = os.path.join(output_dir, 'gene_filtering')
    trinity_asm = os.path.join(gene_filtering_dir, 'trinity_transcripts.fna')
    command = 'cat {} > {}'.format(' '.join(trinity_asms), trinity_asm)
    logger_time.debug('Create transcript')
    logger_txt.debug('[Run] {}'.format(command))
    os.system(command)

    gff3_files = [augustus_gff3] + maker_gff3s + braker1_gff3s
    blastn_out_files = []
    for gff3_file in gff3_files:
        transcript_file = make_transcripts(genome_assembly, gff3_file)
        blastn_out_file = run_blastn(transcript_file, trinity_asm, output_dir)
        blastn_out_files.append(blastn_out_file)

    # Import BLAST, BUSCO and Pfam score
    blastp_dict = import_blastp(blastp_output, nr_prot_mapping_file)
    busco_dict = import_busco(busco_out_dir, output_dir)
    pfam_dict = import_pfam(pfam_scan_out, nr_prot_mapping_file)
    blastn_dict = import_blastn(blastn_out_files, output_dir)

    # Catch bad genes
    bad_dict = catch_bad_genes(gff3_files, genome_assembly, output_dir)
    filter_gff3s(genome_assembly, gff3_files, blastp_dict, busco_dict,
                 pfam_dict, blastn_dict, bad_dict, nr_prot_file,
                 nr_prot_mapping_file, output_dir)
    gff3_postprocess(genome_assembly, output_dir)

    # Copy output files
    copy_output(output_dir)

    # Create markdown
    create_markdown(genome_assembly, output_dir, trans_bams, trinity_asms)
Beispiel #2
0
def getInput(player):
    prefix = "{} (Player {})".format(player.name, player.number)
    inp = input("{}: Please input 'Rock', 'Paper' or 'Scissors': ".format(prefix)).lower()

    if inp not in ["rock", "paper", "scissors"]:
        print("{}: Try again.".format(prefix))
        getInput(player)
    
    return inp

while True:
    p1_inp = getInput(player1)
    sys("cls")
    p2_inp = getInput(player2)

    winner = check_inputs(player1, player2, p1_inp, p2_inp)

    if winner == "p1": player1.matches_won += 1
    elif winner == "p2": player2.matches_won += 1

    try_again = input("Do you want to try again or both player's statistics? Enter 'Yes' to try again, 'viewstats' to view your statistics and anything else to exit. ").lower()

    if try_again == "yes":
        print("Starting a new game!")
        continue
    elif try_again == "viewstats":
        date1 = datetime.fromtimestamp(started)
        date2 = datetime.fromtimestamp(unixtime())
        rd = relativedelta(date2, date1)
        print("-== STATISTICS ==-\nPlayer 1 ({}): Won {} games.\n\nPlayer 2 ({}): Won {} games.\n\nPlayed for: {} years, {} months, {} days, {} hours, {} minutes and {} seconds.".format(player1.name, player1.matches_won, player2.name, player2.matches_won, rd.years, rd.months, rd.days, rd.hours, rd.minutes, rd.seconds))
        new_try_again = input("Do you want to go back to playing now, or exit? Enter 'Yes' to try again, and anything else to exit. ").lower()
Beispiel #3
0
from passengers import Passengers
from traxi import Traxi
from check_inputs import check_inputs


if __name__ == "__main__":
    check_inputs()

    p = Passengers()
    t = Traxi()
    while True:
        new_passenger = p.generate()
        t.manage(new_passenger)
Beispiel #4
0
def main(argv):
    argparse_usage = (
        'fungap.py -g <genome_assembly> -12UA <trans_read_files> '
        '-o <output_dir> -p <project_name> -a <augustus_species> '
        '-O <org_id> -s <sister_proteome>')
    parser = ArgumentParser(usage=argparse_usage)
    parser.add_argument("-o",
                        "--output_dir",
                        dest="output_dir",
                        nargs=1,
                        help="Output directory (default: 'fungap_out')")
    parser.add_argument(
        "-1",
        "--trans_read_1",
        dest="trans_read_1",
        nargs='?',
        help=('Paired-end read1 "<prefix>_1.fastq" <prefix> may not '
              'contain "_" character'))
    parser.add_argument(
        "-2",
        "--trans_read_2",
        dest="trans_read_2",
        nargs='?',
        help=('Paired-end read2 "<prefix>_2.fastq" <prefix> may not '
              'contain "_" character'))
    parser.add_argument(
        "-U",
        "--trans_read_single",
        dest="trans_read_single",
        nargs='?',
        help=('Single read "<prefix>_s.fastq" <prefix> may not '
              'contain "_" character'))
    parser.add_argument(
        "-A",
        "--trans_bam",
        dest="trans_bam",
        nargs='?',
        help='BAM file (RNA-seq reads alignment onto a genome assembly')
    parser.add_argument(
        "-p",
        "--project_name",
        dest="project_name",
        nargs=1,
        help="Project name without space. e.g. Mag (default: 'project')")
    parser.add_argument("-g",
                        "--genome_assembly",
                        dest="genome_assembly",
                        nargs=1,
                        help="Genome assembly file in FASTA format")
    parser.add_argument("-a",
                        "--augustus_species",
                        dest="augustus_species",
                        nargs=1,
                        help="AUGUSTUS species")
    parser.add_argument(
        "-O",
        "--org_id",
        dest="org_id",
        nargs=1,
        help=(
            "Organism ID. E.g. Hypma for Hypsizygus marmoreus (default: 'Gene')"
        ))
    parser.add_argument("-s",
                        "--sister_proteome",
                        dest="sister_proteome",
                        nargs=1,
                        help="Sister proteome sequences in .faa")
    parser.add_argument("-c",
                        "--num_cores",
                        dest="num_cores",
                        nargs=1,
                        help="Number of cores to be used (default: 1)")
    parser.add_argument(
        "-H",
        "--with_hisat2",
        dest="with_hisat2",
        nargs='?',
        help="User-defined Hisat2 installation path (binary directory)")
    parser.add_argument(
        "-t",
        "--with_trinity",
        dest="with_trinity",
        nargs='?',
        help="User-defined Trinity installation path (binary directory)")
    parser.add_argument(
        "-m",
        "--with_maker",
        dest="with_maker",
        nargs='?',
        help="User-defined Maker installation path (binary directory)")
    parser.add_argument(
        "-R",
        "--with_repeat_modeler",
        dest="with_repeat_modeler",
        nargs='?',
        help="User-defined Repeat Modeler installation path (binary directory)"
    )
    parser.add_argument(
        "-b",
        "--with_braker1",
        dest="with_braker1",
        nargs='?',
        help="User-defined Braker1 installation path (binary directory)")
    parser.add_argument(
        "-B",
        "--with_busco",
        dest="with_busco",
        nargs='?',
        help="User-defined BUSCO installation path (binary directory)")
    parser.add_argument(
        "-i",
        "--with_interproscan",
        dest="with_interproscan",
        nargs='?',
        help="User-defined InterproScan installation path (binary directory)")
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%(prog)s {}'.format(__version__))

    # Options for non-fungus genome
    parser.add_argument(
        '--no_braker_fungus',
        dest='no_braker_fungus',
        action='store_true',
        help='No --fungus flag in BRAKER for non-fungus genomes')
    parser.add_argument(
        '--no_jaccard_clip',
        dest='no_jaccard_clip',
        action='store_true',
        help='No --jaccard_clip flag in Trinity for non-fungus genomes')
    parser.add_argument(
        '--no_genemark_fungus',
        dest='no_genemark_fungus',
        action='store_true',
        help='No --fungus flag in GeneMark for non-fungus genomes')
    parser.add_argument("-M",
                        "--max_intron",
                        dest="max_intron",
                        nargs='?',
                        help="Max intron length (Default: 2,000 bp)")

    args = parser.parse_args()
    if args.output_dir:
        output_dir = os.path.abspath(args.output_dir[0])
    else:
        print '[ERROR] Please provide OUTPUT DIRECTORY'
        sys.exit(2)

    if args.trans_read_1:
        trans_read_1 = os.path.abspath(args.trans_read_1)
    else:
        trans_read_1 = ""

    if args.trans_read_2:
        trans_read_2 = os.path.abspath(args.trans_read_2)
    else:
        trans_read_2 = ""

    if args.trans_read_single:
        trans_read_single = os.path.abspath(args.trans_read_single)
    else:
        trans_read_single = ""

    if args.trans_bam:
        trans_bam = os.path.abspath(args.trans_bam)
    else:
        trans_bam = ""

    if args.project_name:
        project_name = args.project_name[0]
    else:
        print '[ERROR] Please provide PROJECT NAME'
        sys.exit(2)

    if args.genome_assembly:
        genome_assembly = os.path.abspath(args.genome_assembly[0])
    else:
        print '[ERROR] Please provide GENOME ASSEMBLY FILE'
        sys.exit(2)

    if args.augustus_species:
        augustus_species = args.augustus_species[0]
    else:
        print '[ERROR] Please provide AUGUSTUS SPECIES'
        sys.exit(2)

    if args.org_id:
        org_id = args.org_id[0]
    else:
        print '[ERROR] Please provide ORGANISM ID'
        sys.exit(2)

    if args.sister_proteome:
        sister_proteome = os.path.abspath(args.sister_proteome[0])
    else:
        print '[ERROR] Please provide SISTER PROTEOME FILE'
        sys.exit(2)

    if args.num_cores:
        num_cores = args.num_cores[0]
    else:
        num_cores = 1

    if args.with_hisat2:
        with_hisat2 = os.path.abspath(args.with_hisat2)
    else:
        with_hisat2 = ''

    if args.with_trinity:
        with_trinity = os.path.abspath(args.with_trinity)
    else:
        with_trinity = ''

    if args.with_maker:
        with_maker = os.path.abspath(args.with_maker)
    else:
        with_maker = ''

    if args.with_repeat_modeler:
        with_repeat_modeler = os.path.abspath(args.with_repeat_modeler)
    else:
        with_repeat_modeler = ''

    if args.with_braker1:
        with_braker1 = os.path.abspath(args.with_braker1)
    else:
        with_braker1 = ''

    if args.with_busco:
        with_busco = os.path.abspath(args.with_busco)
    else:
        with_busco = ''

    if args.with_interproscan:
        with_interproscan = os.path.abspath(args.with_interproscan)
    else:
        with_interproscan = ''

    # For non-fungus genomes
    if args.no_braker_fungus:
        no_braker_fungus = ''
    else:
        no_braker_fungus = '--fungus'

    if args.no_jaccard_clip:
        no_jaccard_clip = ''
    else:
        no_jaccard_clip = '--jaccard_clip'

    if args.no_genemark_fungus:
        no_genemark_fungus = ''
    else:
        no_genemark_fungus = '--gmes_fungus'

    if args.max_intron:
        max_intron = int(args.max_intron)
    else:
        max_intron = 2000

    # Create nessasary dirs
    create_dir(output_dir)

    # Set logging
    log_file = os.path.join(output_dir, 'logs', 'pipeline', 'fungap.log')
    global logger_time, logger_txt
    logger_time, logger_txt = set_logging(log_file)

    logger_txt.debug("\n============ New Run %s ============" %
                     (datetime.now()))

    # Run functions :) Slow is as good as Fast
    trans_read_files = check_inputs(trans_read_1, trans_read_2,
                                    trans_read_single, trans_bam,
                                    genome_assembly, sister_proteome)
    config_file = run_check_dependencies(output_dir, with_hisat2, with_trinity,
                                         with_maker, with_repeat_modeler,
                                         with_braker1, with_busco,
                                         with_interproscan)
    trans_bams = run_hisat2(genome_assembly, trans_read_files, output_dir,
                            num_cores, config_file, max_intron)
    trinity_asms = run_trinity(trans_bams, output_dir, project_name, num_cores,
                               config_file, no_jaccard_clip, max_intron)
    repeat_model_file = run_repeat_modeler(genome_assembly, output_dir,
                                           project_name, num_cores,
                                           config_file)
    maker_gff3s, maker_faas = run_maker(genome_assembly, output_dir,
                                        augustus_species, project_name,
                                        sister_proteome, num_cores,
                                        repeat_model_file, trinity_asms,
                                        config_file, no_genemark_fungus)
    # Get masked assembly
    masked_assembly = os.path.join(output_dir, 'gpre_maker',
                                   'masked_assembly.fasta')

    # Run Augustus
    augustus_gff3, augustus_faa = run_augustus(masked_assembly, output_dir,
                                               augustus_species)

    # Run Braker1
    braker1_gff3s, braker1_faas = run_braker1(masked_assembly, trans_bams,
                                              output_dir, num_cores,
                                              config_file, no_braker_fungus)

    # Run BUSCO on each gene models
    if not glob(os.path.join(output_dir, 'gpre_busco')):
        os.mkdir(os.path.join(output_dir, 'gpre_busco'))

    for maker_faa in maker_faas:
        maker_prefix = os.path.basename(maker_faa).split('.')[0]
        maker_busco = os.path.join(output_dir, 'gpre_busco', maker_prefix)
        run_busco(maker_faa, maker_busco, num_cores, config_file)

    augustus_prefix = os.path.basename(augustus_faa).split('.')[0]
    augustus_busco = os.path.join(output_dir, 'gpre_busco', augustus_prefix)
    run_busco(augustus_faa, augustus_busco, num_cores, config_file)

    for braker1_faa in braker1_faas:
        braker1_prefix = os.path.basename(braker1_faa).split('.')[0]
        braker1_busco = os.path.join(output_dir, 'gpre_busco', braker1_prefix)
        run_busco(braker1_faa, braker1_busco, num_cores, config_file)

    busco_dir = os.path.join(output_dir, 'gpre_busco')

    # Get protein nr by removing identical proteins
    all_prot_files = maker_faas + [augustus_faa] + braker1_faas
    nr_prot_file, nr_prot_mapping_file = make_nr_prot(all_prot_files,
                                                      output_dir)

    # Run BLASTp with nr prot file
    blastp_output = run_blastp(nr_prot_file, output_dir, sister_proteome,
                               num_cores)

    # Run IPRscan with nr prot file
    ipr_output = run_iprscan(nr_prot_file, output_dir, config_file)

    # Get transcripts
    transcript_dir = os.path.join(output_dir, 'gpre_filtered', 'transcript')
    if not os.path.exists(transcript_dir):
        os.mkdir(transcript_dir)
    trinity_asm = os.path.join(transcript_dir, 'trinity_transcripts.fna')
    command = 'cat %s > %s' % (' '.join(trinity_asms), trinity_asm)
    logger_txt.debug('Create transcript')
    logger_txt.debug('[Run] %s' % (command))
    os.system(command)

    augustus_transcript = make_transcripts(genome_assembly, augustus_gff3,
                                           transcript_dir, augustus_prefix)
    run_blastn(augustus_transcript, trinity_asm, output_dir, augustus_prefix)
    for maker_gff3 in maker_gff3s:
        maker_prefix = os.path.basename(maker_gff3).split('.')[0]
        maker_transcript = make_transcripts(genome_assembly, maker_gff3,
                                            transcript_dir, maker_prefix)
        run_blastn(maker_transcript, trinity_asm, output_dir, maker_prefix)
    for braker1_gff3 in braker1_gff3s:
        braker1_prefix = os.path.basename(braker1_gff3).split('.')[0]
        braker1_transcript = make_transcripts(genome_assembly, braker1_gff3,
                                              transcript_dir, braker1_prefix)
        run_blastn(braker1_transcript, trinity_asm, output_dir, braker1_prefix)

    # Import BLAST, BUSCO and Pfam score
    blast_dict_score, blast_dict_evalue = import_blast(blastp_output,
                                                       nr_prot_mapping_file)
    busco_dict_score, busco_dict_list = import_busco(busco_dir)
    pfam_dict_score, pfam_dict_count = import_pfam(ipr_output,
                                                   nr_prot_mapping_file)
    blastn_dict = import_blastn(transcript_dir)

    # Catch bad genes
    D_bad_pickle = catch_bad_genes(maker_gff3s, augustus_gff3, braker1_gff3s,
                                   genome_assembly, output_dir)

    filter_gff3s(maker_gff3s, augustus_gff3, braker1_gff3s, blast_dict_score,
                 blast_dict_evalue, busco_dict_score, busco_dict_list,
                 pfam_dict_score, pfam_dict_count, blastn_dict, D_bad_pickle,
                 nr_prot_file, nr_prot_mapping_file, org_id, output_dir)

    # Copy output files
    copy_output(output_dir)

    # Create markdown
    create_markdown(genome_assembly, output_dir, trinity_asms)