Esempio n. 1
0
def main():
    model = argparse.ArgumentParser(
        description='Commandline tool for TAP prediction',
        )

    model.add_argument('-m',
        '--method',
        type=str,
        choices=TAPPredictorFactory.available_methods().keys(),
        default="svmtap",
        help='The name of the prediction method'
        )

    model.add_argument('-v',
        '--version',
        type=str,
        default="",
        help='The version of the prediction method'
        )

    model.add_argument('-i',
        '--input',
        type=str,
        required=True,
        help='Path to the input file'
        )

    model.add_argument('-t',
        '--type',
        choices=["fasta", "peptide"],
        type=str,
        default="fasta",
        help='The data type of the input (fasta, peptide list)'
        )

    model.add_argument('-l',
        '--length',
        type=int,
        default=9,
        help='The length of peptides'
        )

    model.add_argument('-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool"
    )

    model.add_argument('-o',
        '--output',
        type=str,
        required=True,
        help='Path to the output file'
        )

    args = model.parse_args()

    #fasta protein
    if args.type == "fasta":
        with open(args.input, 'r') as f:
            first_line = f.readline()
        sep_pos = 1 if first_line.count("|") else 0
        proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos)
        peptides = generate_peptides_from_proteins(proteins, int(args.length))
    elif args.type == "peptide":
        peptides = read_lines(args.input, in_type=Peptide)
    else:
        sys.stderr.write('Input type not known\n')
        return -1

    if args.version == "":
        result = TAPPredictorFactory(args.method).predict(peptides, options=args.options)
    else:
        result = TAPPredictorFactory(args.method, version=args.version).predict(peptides, options=args.options)

    #write to TSV columns sequence method score...,protein-id/transcript-id
    with open(args.output, "w") as f:
        proteins = "\tProtein ID" if args.type == "fasta" else ""
        f.write("Sequence\tMethod\t"+"Score"+proteins+"\n")
        for index, row in result.iterrows():
            p = index
            proteins = ",".join(prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else ""
            f.write(str(p)+"\t"+"\t".join("%s\t%.3f"%(method, score) for
                                          method, score in row.iteritems())+"\t"+proteins+"\n")
    return 0