def get_params(): from helpers import log_stderr """ Gets the params dictionary that hold all the configuration information of the program. This is loaded from 'inmembrane.config' which should be found in the same place as the main binary. If 'inmembrane.config' is not a found, a default 'inmembrane.config' is generated from 'default_params_str'. The config file should be edited if the binaries are not available on the path, or have different names. """ config = os.path.join(os.getcwd(), 'inmembrane.config') if not os.path.isfile(config): log_stderr("# Couldn't find inmembrane.config file") log_stderr("# So, will generate a default config " + config) abs_hmm_profiles = os.path.join(module_dir, 'hmm_profiles') fh = open(config, 'w') fh.write(default_params_str) fh.close() else: log_stderr("# Loading existing inmembrane.config") params = eval(open(config).read()) return params
def process(params): """ Main program loop. Triggers the 'protocol' found in the params to annotate all proteins give the list of annotations needed by 'protocol'. Then outputs to screen and a .csv file. """ from helpers import dict_get, create_proteins_dict, log_stdout, log_stderr # will load all plugins in the plugins/ directory from inmembrane.plugins import * # initializations exec(import_protocol_python(params)) init_output_dir(params) seqids, proteins = create_proteins_dict(params['fasta']) # TODO: ideally this loop needs to be run within the protocol, # since for some protocols not all plugins # will be run for every sequence, conditional # on the outcome of a previous analysis # eg. protocol.run(params, proteins) # annotates with external binaries as found in plugins for plugin_str in protocol.get_annotations(params): plugin = eval(plugin_str) plugin.annotate(params, proteins) # do protocol analysis on the results of the annotations for seqid in seqids: protein = proteins[seqid] protocol.post_process_protein(params, protein) log_stdout(protocol.protein_output_line(seqid, proteins)) # print a summary table of classifications to stderr log_stderr(protocol.summary_table(params, proteins)) # always write to biologist-friendly csv file f = open(params['csv'], 'w') for seqid in seqids: f.write(protocol.protein_csv_line(seqid, proteins)) f.close() log_stderr("\n") log_stderr("Output written to %s" % (params['csv'])) # TODO: citations for specific HMMs (PFAM etc ?) # write citations to a file and gracefully deal with plugins # without a citation defined import codecs import textwrap f = codecs.open(params['citations'], mode='w', encoding='utf-8') programs_used = [] for program in protocol.get_annotations(params): plugin = eval(program) try: f.write(plugin.citation['name']+":\n") f.write(textwrap.fill(plugin.citation['ref'])) except AttributeError: f.write("%s - no citation provided." % program) f.write("\n\n") try: programs_used.append(plugin.citation['name']) except (AttributeError, KeyError): programs_used.append(program) f.close() log_stderr("\n") log_stderr("This run used %s." % (", ".join(programs_used)) ) log_stderr("References have been written to %s \n" "# - please cite as appropriate." % (params['citations']) ) return proteins
def process(params): """ Main program loop. Triggers the 'protocol' found in the params to annotate all proteins give the list of annotations needed by 'protocol'. Then outputs to screen and a .csv file. """ from helpers import dict_get, create_proteins_dict, log_stdout, log_stderr # will load all plugins in the plugins/ directory from inmembrane.plugins import * # initializations exec(import_protocol_python(params)) init_output_dir(params) seqids, proteins = create_proteins_dict(params['fasta']) # TODO: ideally this loop needs to be run within the protocol, # since for some protocols not all plugins # will be run for every sequence, conditional # on the outcome of a previous analysis # eg. protocol.run(params, proteins) # annotates with external binaries as found in plugins for plugin_str in protocol.get_annotations(params): plugin = eval(plugin_str) plugin.annotate(params, proteins) # do protocol analysis on the results of the annotations for seqid in seqids: protein = proteins[seqid] protocol.post_process_protein(params, protein) log_stdout(protocol.protein_output_line(seqid, proteins)) # print a summary table of classifications to stderr log_stderr(protocol.summary_table(params, proteins)) # always write to biologist-friendly csv file f = open(params['csv'], 'w') for seqid in seqids: f.write(protocol.protein_csv_line(seqid, proteins)) f.close() log_stderr("\n") log_stderr("Output written to %s" % (params['csv'])) # TODO: citations for specific HMMs (PFAM etc ?) # write citations to a file and gracefully deal with plugins # without a citation defined import codecs import textwrap f = codecs.open(params['citations'], mode='w', encoding='utf-8') programs_used = [] for program in protocol.get_annotations(params): plugin = eval(program) try: f.write(plugin.citation['name'] + ":\n") f.write(textwrap.fill(plugin.citation['ref'])) except AttributeError: f.write("%s - no citation provided." % program) f.write("\n\n") try: programs_used.append(plugin.citation['name']) except (AttributeError, KeyError): programs_used.append(program) f.close() log_stderr("\n") log_stderr("This run used %s." % (", ".join(programs_used))) log_stderr("References have been written to %s \n" "# - please cite as appropriate." % (params['citations'])) return proteins