Example #1
0
def process_ppi1(irefindex_file,
                 id_logfile,
                 output_logfile,
                 biochem_file,
                 binary_file,
                 complexes_file,
                 obo_file,
                 biogrid_ptm_codes_file,
                 filtered_pmids_file=None,
                 accepted_taxids=None):

    counts = Phase1Counts()
    filtered_pmids = read_filtered_pmids(filtered_pmids_file)

    id_map = read_id_mapping(id_logfile)

    obo_fp = open(obo_file, 'rU')
    ontology = obo.OBOntology(obo_fp)

    biochem_filter = _BiochemFilter(ontology, biogrid_ptm_codes_file)
    complex_filter = _ComplexFilter(ontology)

    input_fp = open(irefindex_file, 'rU')
    removed_fp = NullFile()
    biochem_fp = open(biochem_file, 'w')
    binary_fp = open(binary_file, 'w')
    complex_fp = open(complexes_file, 'w')
    logfile_fp = open(output_logfile, 'w')

    output_fps = (removed_fp, binary_fp, complex_fp, biochem_fp)
    for fp in output_fps:
        Interaction.write_header(fp)

    scanner = parse_mitab_file(input_fp, full_mitab_iterator, None,
                               iRefIndexInteraction)
    for interaction, lines in scanner:
        line_numbers = lines[1]
        res = _process_interaction(interaction, id_map, filtered_pmids,
                                   logfile_fp, counts, line_numbers, ontology,
                                   biochem_filter, complex_filter,
                                   accepted_taxids)
        interaction.to_file(output_fps[res])

    counts.to_file(logfile_fp)
    input_fp.close()
    obo_fp.close()
    logfile_fp.close()

    for fp in output_fps:
        fp.close()
Example #2
0
def process_ppi2(input_file,
                 output_file,
                 output_logfile,
                 skipped_pmids_file=None,
                 max_complex_size=120,
                 min_complex_size=3):

    counts = Phase2Counts()
    logfile_fp = open(output_logfile, 'w')
    skipped_pmids = read_filtered_pmids(skipped_pmids_file)
    skipped_pmids.add(0)  # this is invalid pmid - not really a paper
    input_fp = open(input_file, 'rU')

    output_fp = open(output_file, 'w')
    Interaction.write_header(output_fp)

    deflator = ComplexDeflator(logfile_fp, max_complex_size, min_complex_size)

    for pmid, pairs, complexes in _parse_by_pmid(input_fp):
        counts.initial_pairs += len(pairs)
        counts.C += len(complexes)
        counts.pmids += 1

        if pmid in skipped_pmids or len(pairs) < (min_complex_size - 1):
            new_complexes = []
            unused_pairs = pairs
        else:
            new_complexes, unused_pairs = deflator(pmid, pairs, complexes)

        _write_unused_pairs(output_fp, unused_pairs)
        _write_existing_complexes(output_fp, complexes)
        _write_new_complexes(output_fp, new_complexes)

        counts.unused_pairs += len(unused_pairs)
        for intr in new_complexes:
            code = intr.edgetype
            counter = getattr(counts, code)
            counter += 1
            setattr(counts, code, counter)

    counts.to_file(logfile_fp)
    input_fp.close()
    output_fp.close()
    logfile_fp.close()