Esempio n. 1
0
def main():
    SCRIPT_PATH = sys.path[0]
    # Change the path to WEKA to the appropriate location on your computer
    WEKA_PATH = SCRIPT_PATH + '/weka-3-8-1/weka.jar'
    PEPSTATS_PATH = SCRIPT_PATH + '/EMBOSS-6.5.7/emboss/'
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the WEKA software exists
    path_exists = os.access(WEKA_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to WEKA software does not exist!")
        print(
            "Check the installation and the given path to the WEKA software %s in ApoplastP.py (line 40)."
            % WEKA_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the EMBOSS software exists for pepstats
    path_exists = os.access(PEPSTATS_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to EMBOSS software does not exist!")
        print(
            "Check the installation and the given path to the EMBOSS software %s in ApoplastP.py (line 41)."
            % PEPSTATS_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    commandline = sys.argv[1:]
    # -----------------------------------------------------------------------------------------------------------
    if commandline:
        FASTA_FILE, short_format, output_file, apoplast_output, nonapoplast_output, prob_threshold = functions.scan_arguments(
            commandline)
        # If no FASTA file was provided with the -i option
        if not FASTA_FILE:
            print()
            print('Please specify a FASTA input file using the -i option!')
            functions.usage()
    else:
        functions.usage()
    # -----------------------------------------------------------------------------------------------------------
    # Temporary folder name identifier that will be used to store results
    RESULTS_PATH = tempfile.mkdtemp() + '/'
    # -----------------------------------------------------------------------------------------------------------
    # Check if FASTA file exists
    try:
        open(FASTA_FILE, 'r')
    except OSError as e:
        print("Unable to open FASTA file:",
              FASTA_FILE)  #Does not exist OR no read permissions
        print("I/O error({0}): {1}".format(e.errno, e.strerror))
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Try to create folder where results will be stored
    try:
        os.mkdir(RESULTS_PATH)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
    # -----------------------------------------------------------------------------------------------------------
    # Extract the identifiers and sequences from input FASTA file
    ORIGINAL_IDENTIFIERS, SEQUENCES = functions.get_seqs_ids_fasta(FASTA_FILE)
    SEQUENCES = [seq.upper() for seq in SEQUENCES]
    # -----------------------------------------------------------------------------------------------------------
    print('-----------------')
    print()
    print("ApoplastP is running for", len(ORIGINAL_IDENTIFIERS),
          "proteins given in FASTA file", FASTA_FILE)
    print()
    print('-----------------')
    # -----------------------------------------------------------------------------------------------------------
    # Replace ambiguous amino acids for ProtParam
    ORIGINAL_SEQUENCES = SEQUENCES
    SEQUENCES = functions.filterX(SEQUENCES)
    # -----------------------------------------------------------------------------------------------------------
    # Write new FASTA file with short identifiers because pepstats can't handle long names
    f_output = RESULTS_PATH + 'short_ids.fasta'
    SHORT_IDENTIFIERS = functions.write_FASTA_short_ids(
        f_output, ORIGINAL_IDENTIFIERS, SEQUENCES)
    # -----------------------------------------------------------------------------------------------------------
    # Call pepstats
    ProcessExe = PEPSTATS_PATH + 'pepstats'
    ParamList = [
        ProcessExe, '-sequence', f_output, '-outfile',
        RESULTS_PATH + 'pepstats.out'
    ]
    try:
        Process = subprocess.Popen(ParamList, shell=False)
        sts = Process.wait()
        cstdout, cstderr = Process.communicate()

        if Process.returncode:
            raise Exception("Calling pepstats returned %s" %
                            Process.returncode)
        if cstdout:
            pass
        elif cstderr:
            sys.exit()
    except:
        e = sys.exc_info()[1]
        print("Error calling pepstats: %s" % e)
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Parse pepstats file
    pepstats_dic = functions.pepstats(SHORT_IDENTIFIERS, SEQUENCES,
                                      RESULTS_PATH + 'pepstats.out')
    # -----------------------------------------------------------------------------------------------------------
    # Write the WEKA arff file for classification of the input FASTA file
    weka_input = RESULTS_PATH + 'weka.arff'
    functions.write_weka_input(weka_input, SHORT_IDENTIFIERS, SEQUENCES,
                               pepstats_dic)
    # -----------------------------------------------------------------------------------------------------------
    # Call WEKA Random Forest model for classification of input FASTA file
    # -----------------------------------------------------------------------------------------------------------
    ParamList = [
        'java', '-cp', WEKA_PATH, 'weka.classifiers.trees.RandomForest', '-l',
        SCRIPT_PATH + '/RATIO4_55_MODEL.model', '-T',
        RESULTS_PATH + 'weka.arff', '-p', 'first-last'
    ]

    with open(RESULTS_PATH + 'APOPLAST_Predictions.txt', 'wb') as out:
        try:
            Process = subprocess.Popen(ParamList, shell=False, stdout=out)
            sts = Process.wait()
            cstdout, cstderr = Process.communicate()

            if Process.returncode:
                raise Exception("Calling WEKA returned %s" %
                                Process.returncode)
            if cstdout:
                pass
            elif cstderr:
                sys.exit(1)
        except:
            e = sys.exc_info()[1]
            print("Error calling WEKA: %s" % e)
            sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Parse the WEKA output file
    file_input = RESULTS_PATH + 'APOPLAST_Predictions.txt'
    predicted_apoplast, predictions = functions.parse_weka_output(
        file_input, ORIGINAL_IDENTIFIERS, ORIGINAL_SEQUENCES, prob_threshold)
    # -----------------------------------------------------------------------------------------------------------
    # If user wants the stdout output directed to a specified file
    if output_file:

        with open(output_file, 'wb') as out:
            # Short format: output predictions for all proteins as tab-delimited table
            if short_format:
                out.writelines(functions.short_output(predictions))
            # If the user wants to see the long format, output additional information and stats
            else:
                out.writelines(functions.short_output(predictions))
                out.writelines(
                    functions.long_output(ORIGINAL_IDENTIFIERS,
                                          predicted_apoplast))
        print('ApoplastP results were saved to output file:', output_file)

    else:
        # Short format: output predictions for all proteins as tab-delimited table to stdout
        if short_format:
            print(functions.short_output(predictions))
        # If the user wants to see the long format, output additional information and stats
        else:
            print(functions.short_output(predictions))
            print(
                functions.long_output(ORIGINAL_IDENTIFIERS,
                                      predicted_apoplast))
    # -----------------------------------------------------------------------------------------------------------
    # If the user additionally wants to save the predicted apoplastic proteins in a provided FASTA file
    if apoplast_output:
        with open(apoplast_output, 'w') as f_output:
            for apoplast, prob, sequence in predicted_apoplast:
                f_output.writelines('>' + apoplast +
                                    ' | Apoplast probability: ' + str(prob) +
                                    '\n')
                f_output.writelines(sequence + '\n')
    if nonapoplast_output:
        with open(nonapoplast_output, 'w') as f_output:
            for apoplast, pred, prob, sequence in predictions:
                if pred == 'Non-apoplastic':
                    f_output.writelines('>' + apoplast +
                                        ' | Non-apoplast probability: ' +
                                        str(prob) + '\n')
                    f_output.writelines(sequence + '\n')
    # -----------------------------------------------------------------------------------------------------------
    # Clean up and delete temporary folder that was created
    shutil.rmtree(RESULTS_PATH)
    # -----------------------------------------------------------------------------------------------------------
    try:
        sys.stdout.close()
    except:
        pass
    try:
        sys.stderr.close()
    except:
        pass
Esempio n. 2
0
def main():
    # -----------------------------------------------------------------------------------------------------------
    SCRIPT_PATH = sys.path[0]
    # Change the path to WEKA to the appropriate location on your computer
    WEKA_PATH = SCRIPT_PATH + '/weka-3-6-12/weka.jar'
    PEPSTATS_PATH = SCRIPT_PATH + '/EMBOSS-6.5.7/emboss/'
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the WEKA software exists
    path_exists = os.access(WEKA_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to WEKA software does not exist!")
        print("Check the installation and the given path to the WEKA software %s in EffectorP.py (line 47)." % WEKA_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the EMBOSS software exists for pepstats
    path_exists = os.access(PEPSTATS_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to EMBOSS software does not exist!")
        print("Check the installation and the given path to the EMBOSS software %s in EffectorP.py (line 48)." % PEPSTATS_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    commandline = sys.argv[1:]
    # -----------------------------------------------------------------------------------------------------------
    if commandline:
        FASTA_FILE, short_format, output_file, effector_output = functions.scan_arguments(commandline)
	# If no FASTA file was provided with the -i option
        if not FASTA_FILE:
            print()
            print('Please specify a FASTA input file using the -i option!')
            functions.usage()
    else:
        functions.usage()
    # -----------------------------------------------------------------------------------------------------------
    # Temporary folder name identifier that will be used to store results
    FOLDER_IDENTIFIER = str(uuid.uuid4())
    # Path to temporary results folder
    if not os.path.exists(SCRIPT_PATH + '/tmp/'):
        os.makedirs(SCRIPT_PATH + '/tmp/')
    RESULTS_PATH = SCRIPT_PATH + '/tmp/' + FOLDER_IDENTIFIER + '/'
    # -----------------------------------------------------------------------------------------------------------
    # Check if FASTA file exists
    try:
        open(FASTA_FILE, 'r')
    except OSError as e:
        print("Unable to open FASTA file:", FASTA_FILE)  # Does not exist OR no read permissions
        print("I/O error({0}): {1}".format(e.errno, e.strerror))
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Try to create folder where results will be stored
    try:
        os.mkdir(RESULTS_PATH)
    except OSError as exception:
        if exception.errno != errno.EEXIST:
            raise
    # -----------------------------------------------------------------------------------------------------------
    # Extract the identifiers and sequences from input FASTA file
    ORIGINAL_IDENTIFIERS, SEQUENCES = functions.get_seqs_ids_fasta(FASTA_FILE)
    # -----------------------------------------------------------------------------------------------------------
    print('-----------------')
    print()
    print("EffectorP is running for", len(ORIGINAL_IDENTIFIERS), "proteins given in FASTA file", FASTA_FILE)
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Write new FASTA file with short identifiers because pepstats can't handle long names
    f_output = RESULTS_PATH + FOLDER_IDENTIFIER + '_short_ids.fasta'
    SHORT_IDENTIFIERS = functions.write_FASTA_short_ids(f_output, ORIGINAL_IDENTIFIERS, SEQUENCES)
    # -----------------------------------------------------------------------------------------------------------
    # Call pepstats
    print('Call pepstats...')
    ProcessExe = PEPSTATS_PATH + 'pepstats'
    ParamList = [ProcessExe, '-sequence', RESULTS_PATH + FOLDER_IDENTIFIER + '_short_ids.fasta',
              '-outfile', RESULTS_PATH + FOLDER_IDENTIFIER + '.pepstats']

    try:
        Process = subprocess.Popen(ParamList, shell=False)
        sts = Process.wait()
        cstdout, cstderr = Process.communicate()

        if Process.returncode:
            raise Exception("Calling pepstats returned %s" % Process.returncode)
        if cstdout:
            pass
        elif cstderr:
            sys.exit()
    except:
        e = sys.exc_info()[1]
        print("Error calling pepstats: %s" % e)
        sys.exit()
    print('Done.')
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Parse pepstats file
    print('Scan pepstats file')
    pepstats_dic = functions.pepstats(SHORT_IDENTIFIERS, SEQUENCES, RESULTS_PATH + FOLDER_IDENTIFIER + '.pepstats')
    print('Done.')
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Write the WEKA arff file for classification of the input FASTA file
    weka_input = RESULTS_PATH + FOLDER_IDENTIFIER + '.arff'
    functions.write_weka_input(weka_input, SHORT_IDENTIFIERS, pepstats_dic)
    # -----------------------------------------------------------------------------------------------------------
    # Call WEKA Naive Bayes model for classification of input FASTA file
    print('Start classification with EffectorP...')

    ParamList = ['java', '-cp', WEKA_PATH, 'weka.classifiers.bayes.NaiveBayes', '-l', SCRIPT_PATH + '/trainingdata_samegenomes_iteration15_ratio3_bayes.model',
             '-T', RESULTS_PATH + FOLDER_IDENTIFIER + '.arff', '-p', 'first-last']

    with open(RESULTS_PATH + FOLDER_IDENTIFIER + '_Predictions.txt', 'wb') as out:
        try:
            Process = subprocess.Popen(ParamList, shell=False, stdout=out)
            sts = Process.wait()
            cstdout, cstderr = Process.communicate()

            if Process.returncode:
                raise Exception("Calling WEKA returned %s"%Process.returncode)
            if cstdout:
                pass
            elif cstderr:
                sys.exit(1)
        except:
            e = sys.exc_info()[1]
            print("Error calling WEKA: %s" % e)
            sys.exit(1)
        print('Done.')
        print()
        print('-----------------')
    # -----------------------------------------------------------------------------------------------------------
    # Parse the WEKA output file
    file_input = RESULTS_PATH + FOLDER_IDENTIFIER + '_Predictions.txt'
    predicted_effectors, predictions = functions.parse_weka_output(file_input, ORIGINAL_IDENTIFIERS, SEQUENCES)
    # -----------------------------------------------------------------------------------------------------------
    # If user wants the stdout output directed to a specified file
    if output_file:

        with open(output_file, 'wb') as out:
            # Short format: output predictions for all proteins as tab-delimited table
            if short_format:
                out.writelines(functions.short_output(predictions))
            # If the user wants to see the long format, output additional information and stats
            else:
                out.writelines(functions.short_output(predictions))
                out.writelines(functions.long_output(ORIGINAL_IDENTIFIERS, predicted_effectors))
        print('EffectorP results were saved to output file:', output_file)

    else:
        # Short format: output predictions for all proteins as tab-delimited table to stdout
        if short_format:
            print(functions.short_output(predictions))
        # If the user wants to see the long format, output additional information and stats
        else:
            print(functions.short_output(predictions))
            print(functions.long_output(ORIGINAL_IDENTIFIERS, predicted_effectors))
    # -----------------------------------------------------------------------------------------------------------
    # If the user additionally wants to save the predicted effectors in a provided FASTA file
    if effector_output:
        with open(effector_output, 'w') as f_output:
            for effector, prob, sequence in predicted_effectors:
                f_output.writelines('>' + effector + ' | Effector probability: ' + str(prob) + '\n')
                f_output.writelines(sequence + '\n')
    # -----------------------------------------------------------------------------------------------------------
    # Clean up and delete temporary folder that was created
    shutil.rmtree(RESULTS_PATH)
    # -----------------------------------------------------------------------------------------------------------
    return
Esempio n. 3
0
for identifier, sequence in functions.SimpleFastaParser(open(FASTA_FILE, 'r')):
    ORIGINAL_IDENTIFIERS.append(identifier)
    SEQUENCES.append(sequence.replace('*', ''))

SEQUENCES = [seq.upper() for seq in SEQUENCES]
# -----------------------------------------------------------------------------------------------------------
print('-----------------')
print()
print("EffectorP 3.0 is running for", len(ORIGINAL_IDENTIFIERS),
      "proteins given in FASTA file", FASTA_FILE)
print()
# -----------------------------------------------------------------------------------------------------------
# Write new FASTA file with short identifiers so WEKA has safe input
f_output = RESULTS_PATH + 'short_ids.fasta'
SHORT_IDENTIFIERS = functions.write_FASTA_short_ids(f_output,
                                                    ORIGINAL_IDENTIFIERS,
                                                    SEQUENCES)
# -----------------------------------------------------------------------------------------------------------
# Write the WEKA arff file for classification of the input FASTA file
weka_input = RESULTS_PATH + 'weka.arff'
# Ensembl averaging approach, use seq0,seq1,seq2... as keys in case there are duplicate FASTA identifiers
functions.write_weka_input(weka_input, SHORT_IDENTIFIERS, SEQUENCES)
# -----------------------------------------------------------------------------------------------------------
ensembl_votes_cytoplasmic = {}
ensembl_votes_apoplastic = {}
# -----------------------------------------------------------------------------------------------------------
# Call WEKA models for classification of input FASTA file
# -----------------------------------------------------------------------------------------------------------
print('Ensemble classification')

if FUNGAL_MODE == True:
Esempio n. 4
0
def main():
    # -----------------------------------------------------------------------------------------------------------
    SCRIPT_PATH = sys.path[0]
    # Change the path to WEKA to the appropriate location on your computer
    WEKA_PATH = SCRIPT_PATH + '/weka-3-8-1/weka.jar'
    PEPSTATS_PATH = SCRIPT_PATH + '/EMBOSS-6.5.7/emboss/'
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the WEKA software exists
    path_exists = os.access(WEKA_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to WEKA software does not exist!")
        print("Check the installation and the given path to the WEKA software %s in EffectorP.py (line 47)." % WEKA_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Check that the path to the EMBOSS software exists for pepstats
    path_exists = os.access(PEPSTATS_PATH, os.F_OK)
    if not path_exists:
        print()
        print("Path to EMBOSS software does not exist!")
        print("Check the installation and the given path to the EMBOSS software %s in EffectorP.py (line 48)." % PEPSTATS_PATH)
        print()
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    commandline = sys.argv[1:]
    # -----------------------------------------------------------------------------------------------------------
    if commandline:
        FASTA_FILE, short_format, output_file, effector_output, noneffector_output = functions.scan_arguments(commandline)
	# If no FASTA file was provided with the -i option
        if not FASTA_FILE:
            print()
            print('Please specify a FASTA input file using the -i option!')
            functions.usage()
    else:
        functions.usage()
    # -----------------------------------------------------------------------------------------------------------
    # Temporary folder name identifier that will be used to store results
    RESULTS_PATH = tempfile.mkdtemp() + '/'
    # -----------------------------------------------------------------------------------------------------------
    # Check if FASTA file exists
    try:
        open(FASTA_FILE, 'r') 
    except OSError as e:
        print("Unable to open FASTA file:", FASTA_FILE)  #Does not exist OR no read permissions
        print("I/O error({0}): {1}".format(e.errno, e.strerror))
        sys.exit(1)
    # -----------------------------------------------------------------------------------------------------------
    # Try to create folder where results will be stored
    try:
        os.mkdir(RESULTS_PATH)
    except OSError as exception:        
        if exception.errno != errno.EEXIST:
            raise
    # -----------------------------------------------------------------------------------------------------------
    # Extract the identifiers and sequences from input FASTA file
    ORIGINAL_IDENTIFIERS, SEQUENCES = functions.get_seqs_ids_fasta(FASTA_FILE)
    SEQUENCES = [seq.upper() for seq in SEQUENCES]
    # -----------------------------------------------------------------------------------------------------------
    print('-----------------')
    print()
    print("EffectorP 2.0 is running for", len(ORIGINAL_IDENTIFIERS), "proteins given in FASTA file", FASTA_FILE)
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Write new FASTA file with short identifiers because pepstats can't handle long names
    f_output = RESULTS_PATH + 'short_ids.fasta'
    SHORT_IDENTIFIERS = functions.write_FASTA_short_ids(f_output, ORIGINAL_IDENTIFIERS, SEQUENCES)
    # -----------------------------------------------------------------------------------------------------------
    # Call pepstats
    print('Call pepstats...')
    ProcessExe = PEPSTATS_PATH + 'pepstats'
    ParamList = [ProcessExe, '-sequence', RESULTS_PATH + 'short_ids.fasta', 
              '-outfile', RESULTS_PATH +  'pepstats.out']

    try:
        Process = subprocess.Popen(ParamList, shell=False)
        sts = Process.wait()
        cstdout, cstderr = Process.communicate()

        if Process.returncode:
            raise Exception("Calling pepstats returned %s"%Process.returncode)
        if cstdout:
            pass
        elif cstderr:
            sys.exit()
    except:
        e = sys.exc_info()[1]
        print("Error calling pepstats: %s" % e)
        sys.exit()
    print('Done.')
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Parse pepstats file
    print('Scan pepstats file')
    pepstats_dic = functions.pepstats(SHORT_IDENTIFIERS, SEQUENCES, RESULTS_PATH +  'pepstats.out')
    print('Done.')
    print()
    # -----------------------------------------------------------------------------------------------------------
    # Write the WEKA arff file for classification of the input FASTA file
    weka_input = RESULTS_PATH + 'weka.arff'    
    functions.write_weka_input(weka_input, SHORT_IDENTIFIERS, pepstats_dic)
    # -----------------------------------------------------------------------------------------------------------
    # Ensembl averaging approach, use seq0,seq1,seq2... as keys in case there are duplicate FASTA identifiers
    ensembl_votes = {}
    # -----------------------------------------------------------------------------------------------------------
    # Call WEKA models for classification of input FASTA file
    # -----------------------------------------------------------------------------------------------------------
    models = functions.models_bayes + functions.models_J48
    # -----------------------------------------------------------------------------------------------------------
    print('Ensemble classification')
    for model in functions.models_bayes:
        #--------------------------------------------------------------
        ParamList = ['java', '-cp', WEKA_PATH, 'weka.classifiers.bayes.NaiveBayes', '-l', SCRIPT_PATH + model, '-T', RESULTS_PATH + 'weka.arff', '-p', 'first-last']

        with open(RESULTS_PATH + 'Predictions.txt', 'wb') as out:
            try:
                Process = subprocess.Popen(ParamList, shell=False, stdout=out)
                sts = Process.wait()
                cstdout, cstderr = Process.communicate()

                if Process.returncode:
                    raise Exception("Calling WEKA returned %s"%Process.returncode)
                if cstdout:
                    pass
                elif cstderr:
                    sys.exit()
            except:
                e = sys.exc_info()[1]
                print("Error calling WEKA: %s" % e)
                sys.exit(1)
        #-------------------------------------------------------------- 
        # Parse the WEKA output file
        file_input = RESULTS_PATH + 'Predictions.txt'
        predicted_effectors, predicted_noneffectors, predictions = functions.parse_weka_output(file_input, ORIGINAL_IDENTIFIERS, SEQUENCES)
        
        for index, (ident, prediction, prob, seq) in enumerate(predictions):

            short_ident = 'seq' + str(index)

            if short_ident in ensembl_votes:
                previous_predictions = ensembl_votes[short_ident] 
                ensembl_votes[short_ident] = previous_predictions + [(prediction, prob)]
            else:
                ensembl_votes[short_ident] = [(prediction, prob)]
        #-------------------------------------------------------------- 
    for model in functions.models_J48:
        #--------------------------------------------------------------
        ParamList = ['java', '-cp', WEKA_PATH, 'weka.classifiers.trees.J48', '-l', SCRIPT_PATH + model, '-T', RESULTS_PATH + 'weka.arff', '-p', 'first-last']

        with open(RESULTS_PATH + 'Predictions.txt', 'wb') as out:
            try:
                Process = subprocess.Popen(ParamList, shell=False, stdout=out)
                sts = Process.wait()
                cstdout, cstderr = Process.communicate()

                if Process.returncode:
                    raise Exception("Calling WEKA returned %s"%Process.returncode)
                if cstdout:
                    pass
                elif cstderr:
                    sys.exit()
            except:
                e = sys.exc_info()[1]
                print("Error calling WEKA: %s" % e)
                sys.exit(1)
        #-------------------------------------------------------------- 
        # Parse the WEKA output file
        file_input = RESULTS_PATH + 'Predictions.txt'
        predicted_effectors, predicted_noneffectors, predictions = functions.parse_weka_output(file_input, ORIGINAL_IDENTIFIERS, SEQUENCES)
        
        for index, (ident, prediction, prob, seq) in enumerate(predictions):

            short_ident = 'seq' + str(index)

            if short_ident in ensembl_votes:
                previous_predictions = ensembl_votes[short_ident] 
                ensembl_votes[short_ident] = previous_predictions + [(prediction, prob)]
            else:
                ensembl_votes[short_ident] = [(prediction, prob)]
    print('Done.')
    print()
    #--------------------------------------------------------------
    # Soft voting
    #--------------------------------------------------------------
    ensemble_predictions, predicted_effectors, predicted_noneffectors, predicted_weakeffectors = [], [], [], []
    #--------------------------------------------------------------
    for index, (ident, prediction, prob, seq) in enumerate(predictions):
 
        short_ident = 'seq' + str(index)
        yes_prob, no_prob = [], []

        for vote, prob in ensembl_votes[short_ident]:

            if vote == 'Non-effector':
                no_prob.append(prob)
                yes_prob.append(1.0 - prob)                        

            if vote == 'Effector':
                yes_prob.append(prob)
                no_prob.append(1.0 - prob)        
             
        yes_prob = sum(yes_prob)/float(len(models))
        no_prob = sum(no_prob)/float(len(models))
        
        yes_prob, no_prob = round(yes_prob,3), round(no_prob,3)

        if yes_prob > no_prob:
            if yes_prob > 0.55:
                prediction = 'Effector'
                prob = round(yes_prob,3)
                predicted_effectors.append((ident, prob, seq))
            else:
                prediction = 'Unlikely effector'
                prob = round(yes_prob,3)
                predicted_weakeffectors.append((ident, prob, seq))
        else:
            prediction = 'Non-effector'
            prob = round(no_prob,3)
            predicted_noneffectors.append((ident, prob, seq))

        ensemble_predictions.append((ident, prediction, prob, seq))
    #--------------------------------------------------------------
    # If user wants the stdout output directed to a specified file
    if output_file:

        with open(output_file, 'wb') as out:
            # Short format: output predictions for all proteins as tab-delimited table
            if short_format:
                out.writelines(functions.short_output(ensemble_predictions))
            # If the user wants to see the long format, output additional information and stats
            else:
                out.writelines(functions.short_output(ensemble_predictions))
                out.writelines(functions.long_output(ORIGINAL_IDENTIFIERS, predicted_effectors))                
        print('EffectorP results were saved to output file:', output_file) 

    else:
        # Short format: output predictions for all proteins as tab-delimited table to stdout
        if short_format:
            print(functions.short_output(ensemble_predictions))
        # If the user wants to see the long format, output additional information and stats
        else:
            print(functions.short_output(ensemble_predictions))
            print(functions.long_output(ORIGINAL_IDENTIFIERS, predicted_effectors))
    # -----------------------------------------------------------------------------------------------------------
    # If the user additionally wants to save the predicted effectors in a provided FASTA file
    if effector_output:
        with open(effector_output, 'w') as f_output:
            for effector, prob, sequence in predicted_effectors:
                f_output.writelines('>' + effector + ' | Effector probability: ' + str(prob) + '\n')
                f_output.writelines(sequence + '\n')  
    if noneffector_output:
        with open(noneffector_output, 'w') as f_output:
            for effector, prob, sequence in predicted_noneffectors:
                f_output.writelines('>' + effector + ' | Non-effector probability: ' + str(prob) + '\n')
                f_output.writelines(sequence + '\n')  
            for effector, prob, sequence in predicted_weakeffectors:
                f_output.writelines('>' + effector + ' | Unlikely effector probability: ' + str(prob) + '\n')
                f_output.writelines(sequence + '\n')  
    # -----------------------------------------------------------------------------------------------------------
    # -----------------------------------------------------------------------------------------------------------
    # Clean up and delete temporary folder that was created
    shutil.rmtree(RESULTS_PATH)
    # -----------------------------------------------------------------------------------------------------------
    try:
        sys.stdout.close()
    except:
        pass
    try:
        sys.stderr.close()
    except:
        pass