Python make_csv_dict_writer Exemples, convutils.convutils.make_csv_dict_writer Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : sabpn.py Projet : EdwardBetts/BiologicalProcessNetworks

def main(argv=None):
    cli_parser = bpn.cli.SaCli()
    input_data = cli_parser.parse_args(argv)

    logger.info("Constructing supporting data structures; this may "
            "take a while...")
    annotated_interactions = bpn.structures.AnnotatedInteractionsArray(
            input_data.interactions_graph,
            input_data.annotations_dict
    )
    logger.info("Considering %d candidate links in total." %
            annotated_interactions.calc_num_links())

    logger.info("Constructing Simulated Annealing")
    if input_data.free_parameters:
        logger.info("Using free parameter transitions.")
        parameters_state_class = states.RandomTransitionParametersState
    else:
        parameters_state_class = states.PLNParametersState
    if input_data.disable_swaps:
        logger.info("Disabling swap transitions.")
        links_state_class = states.NoSwapArrayLinksState
    else:
        links_state_class = states.ArrayLinksState
    if input_data.detailed_transitions:
        logger.info("Recording extra information for each state.")
        transitions_csvfile = convutils.make_csv_dict_writer(
                input_data.transitions_outfile,
                DETAILED_TRANSITIONS_FIELDNAMES
        )
    else:
        transitions_csvfile = convutils.make_csv_dict_writer(
                input_data.transitions_outfile,
                TRANSITIONS_FIELDNAMES
        )
    sa = simulatedannealing.ArraySimulatedAnnealing(
            annotated_interactions,
            input_data.activity_threshold,
            input_data.transition_ratio,
            num_steps=input_data.steps,
            temperature=input_data.temperature,
            end_temperature=input_data.end_temperature,
	    parameters_state_class=parameters_state_class,
            links_state_class=links_state_class
    )
    logger.info("Beginning to Anneal. This may take a while...")
    sa.run()
    logger.info("Run completed.")

    logger.info("Writing link results to %s" %
            input_data.links_outfile.name)
    links_out_csvwriter = convutils.make_csv_dict_writer(
            input_data.links_outfile, LINKS_FIELDNAMES)
    logger.info("Writing parameter results to %s" % (
            input_data.parameters_outfile.name))
    parameters_out_csvwriter = convutils.make_csv_dict_writer(
            input_data.parameters_outfile, PARAMETERS_FIELDNAMES)
    logger.info("Writing transitions data to %s." % (
            input_data.transitions_outfile.name))
    logger.info("Finished.")

Exemple #2

0

Afficher le fichier

Fichier : cbpn.py Projet : EdwardBetts/BiologicalProcessNetworks

def calculate_and_output_results_edge_swap(
        outfileh,
        pairs,
        total_pairs,
        interactions_graph,
        annotations_dict,
        num_permutations,
        num_edge_swap_events,
        use_estimation=True,
        score_correction=False
    ):
    """Calculates the significance of a link between each given pair of
    annotation terms using resampling of genes annotated by the second
    term.

    :Parameters:
    - `outfileh`: a file handle to a file for output
    - `pairs`: an iterable of pairs of annotation terms
    - `total_pairs`: the number of total annotation pairs to be
      processed
    - `interactions_graph`: graph containing the gene-gene or gene
      product-gene product interactions
    - `annotations_dict`: a dictionary with annotation terms as keys and
      `set`s of genes as values
    - `num_permutations`: maximum number of permutations to perform
      [NOTE: see `use_estimation`]
    - `num_edge_swap_events`: the number of edge swap events desired to
      produce each random graph. [NOTE: this number is multiplied by the
      number of edges in the `interactions_graph` to get the total number
      of edge swap events.]
    - `use_estimation`: estimate significances for pairs which are
      unlikely to have significant scores [default: `True`] [NOTE: using
      this option will not guarantee that the number of permutations
      specified by `num_permutations` will be performed.]
    - `score_correction`: if `True`, perform correction on scores using
      an expected value computed from the mean expression value
      [default: `False`]

    """
    # Create the output CSV file.
    csv_writer = convutils.make_csv_dict_writer(outfileh, OUTFILE_FIELDS)
    pair_statistics = compute_significance_for_pairs_edge_swap(
            pairs,
            interactions_graph,
            annotations_dict,
            num_permutations,
            num_edge_swap_events,
            use_estimation,
            score_correction
    )
    logger.info("Writing results to %s" % outfileh.name)
    write_results_to_csv(csv_writer, pair_statistics.iteritems())

Exemple #3

0

Afficher le fichier

Fichier : bpln.py Projet : EdwardBetts/BiologicalProcessNetworks

def calculate_and_output_scores(
        interactions_graph,
        annotations_dict,
        links,
        num_links,
        links_outfile
    ):
    """Calculate and output the link scores.

    :Parameters:
    - `interactions_graph`: graph containing the gene-gene or gene
      product-gene product interactions
    - `annotations_dict`: a dictionary with annotation terms as keys and
      `set`s of genes as values
    - `links`: pairs of annotation terms of which to calculate link
      scores
    - `num_links`: the number of links contained in `links`
    - `links_outfile`: file for output of link results

    """
    csv_writer = convutils.make_csv_dict_writer(
            links_outfile, OUTFILE_FIELDS)
    overlap_scores = []

    for i, link_scores in enumerate(
            calculate_linkage_scores(
                interactions_graph,
                annotations_dict,
                links
            )
        ):
        overlap_scores.append(link_scores)
        # periodically flush results to disk
        if not ((i + 1) % RESULTS_BUFFER_SIZE):
            percent_done = int(
                    math.floor(100 * (i + 1) / float(num_links))
            )
            logger.info("%d of %d (%d%%) links processed. "
                    "Writing to %s." % (i + 1, num_links,
                    percent_done,
                    links_outfile.name)
            )

            csv_writer.writerows(overlap_scores)
            # flush the scores
            overlap_scores = []

    logger.info("%d of %d (100%%) links processed."
            % (i + 1, num_links))
    logger.info("Writing to %s" % links_outfile.name)
    csv_writer.writerows(overlap_scores)

Exemple #4

0

Afficher le fichier

Fichier : mcmcbpn.py Projet : EdwardBetts/BiologicalProcessNetworks

def main(argv=None):
    starting_time = datetime.datetime.now()
    cli_parser = bpn.cli.McmcCli()
    input_data = cli_parser.parse_args(argv)

    logger.info("Constructing supporting data structures; this may "
            "take a while...")
    if input_data.terms_based:
        annotated_interactions = (
                bpn.structures.AnnotatedInteractions2dArray(
                        input_data.interactions_graph,
                        input_data.annotations_dict,
                        stringent_coannotations=input_data.stringent_coannotations
                )
        )
    else:
        annotated_interactions = (
                bpn.structures.AnnotatedInteractionsArray(
                        input_data.interactions_graph,
                        input_data.annotations_dict,
                        stringent_coannotations=input_data.stringent_coannotations
                )
        )
    # Check to see whether the potential links form a single connected
    # component.
    check_link_components(annotated_interactions)

    # TODO: check a command line option to see if the user input a seed;
    # for now, we'll just generate one all the time and report it.
    random_seed = create_seed_value()
    logger.info("The random seed value for this run is {0}.".format(
            random_seed))
    random.seed(random_seed)

    logger.info("Constructing the Markov chain.")
    # Prepare the CSV writers for the state recorder.
    links_out_csvwriter = convutils.make_csv_dict_writer(
            input_data.links_outfile, LINKS_FIELDNAMES)
    parameters_out_csvwriter = convutils.make_csv_dict_writer(
            input_data.parameters_outfile, PARAMETERS_FIELDNAMES)
    if input_data.terms_based:
        terms_out_csvwriter = convutils.make_csv_dict_writer(
                input_data.terms_outfile, TERMS_FIELDNAMES)

    # Present the seed_links as indices.
    if input_data.seed_links:
        seed_links = [annotated_interactions.get_link_index(*link)
                for link in input_data.seed_links]
    else:
        seed_links = None

    # Choose the appropriate parameters class.
    if input_data.fixed_distributions:
        logger.info("Using fixed distributions for all parameters.")
        if input_data.terms_based:
            parameters_state_class = states.FixedTermPriorParametersState
        else:
            parameters_state_class = (
                    states.FixedDistributionParametersState)
    elif input_data.terms_based:
        parameters_state_class = states.TermPriorParametersState
    elif input_data.free_parameters:
        logger.info("Using free parameter transitions.")
        parameters_state_class = states.RandomTransitionParametersState
    else:
        parameters_state_class = states.PLNParametersState

    if input_data.terms_based:
        logger.info("Using terms-based model.")

        if input_data.independent_terms or input_data.genes_based:
            if input_data.seed_terms:
                seed_terms = [
                        annotated_interactions.get_term_index(term) for
                        term in input_data.seed_terms
                ]
            else:
                seed_terms = None
        #else:
            #seed_terms = None

        if input_data.genes_based:
            if input_data.detailed_transitions:
                transitions_out_csvwriter = convutils.make_csv_dict_writer(
                        input_data.transitions_outfile,
                        GENES_BASED_TRANSITIONS_FIELDNAMES
                )
                state_recorder = (
                        recorders.DetailedGenesBasedStateRecorder(
                                annotated_interactions,
                                parameters_out_csvwriter,
                                links_out_csvwriter,
                                terms_out_csvwriter,
                                transitions_out_csvwriter
                        )
                )
            else:
                transitions_out_csvwriter = convutils.make_csv_dict_writer(
                        input_data.transitions_outfile,
                        TRANSITIONS_FIELDNAMES
                )
                state_recorder = recorders.TermsBasedStateRecorder(
                        annotated_interactions,
                        parameters_out_csvwriter,
                        links_out_csvwriter,
                        terms_out_csvwriter,
                        transitions_out_csvwriter
                )
            logger.info("Assessing term overlap through genes.")
            markov_chain = chains.GenesBasedMarkovChain(
                    state_recorder,
                    input_data.burn_in,
                    input_data.steps,
                    annotated_interactions,
                    input_data.activity_threshold,
                    transition_type_ratio=input_data.transition_ratio,
                    seed_terms_indices=seed_terms,
                    seed_links_indices=seed_links,
                    link_false_pos=input_data.link_false_pos,
                    link_false_neg=input_data.link_false_neg,
                    link_prior=input_data.link_prior,
                    term_false_pos=input_data.term_false_pos,
                    term_false_neg=input_data.term_false_neg,
                    term_prior=input_data.term_prior,
            )
        else:
            if input_data.independent_terms:
                if input_data.detailed_transitions:
                    transitions_out_csvwriter = (
                            convutils.make_csv_dict_writer(
                                input_data.transitions_outfile,
                                INDEPENDENT_TERMS_BASED_TRANSITIONS_FIELDNAMES
                            )
                    )
                    state_recorder = (
                            recorders.DetailedIndependentTermsBasedStateRecorder(
                                    annotated_interactions,
                                    parameters_out_csvwriter,
                                    links_out_csvwriter,
                                    terms_out_csvwriter,
                                    transitions_out_csvwriter
                            )
                    )
                else:
                    transitions_out_csvwriter = (
                            convutils.make_csv_dict_writer(
                                    input_data.transitions_outfile,
                                    TRANSITIONS_FIELDNAMES
                            )
                    )
                    state_recorder = (
                            recorders.TermsBasedStateRecorder(
                                    annotated_interactions,
                                    parameters_out_csvwriter,
                                    links_out_csvwriter,
                                    terms_out_csvwriter,
                                    transitions_out_csvwriter
                            )
                    )
                logger.info("Using independent-terms model.")
                markov_chain = chains.IndependentTermsBasedMarkovChain(
                        state_recorder,
                        input_data.burn_in,
                        input_data.steps,
                        annotated_interactions,
                        input_data.activity_threshold,
                        transition_type_ratio=input_data.transition_ratio,
                        seed_terms_indices=seed_terms,
                        seed_links_indices=seed_links,
                        link_false_pos=input_data.link_false_pos,
                        link_false_neg=input_data.link_false_neg,
                        link_prior=input_data.link_prior,
                        term_prior=input_data.term_prior,
                        parameters_state_class=parameters_state_class
                )
            else:
                if input_data.detailed_transitions:
                    transitions_out_csvwriter = (
                            convutils.make_csv_dict_writer(
                                    input_data.transitions_outfile,
                                    TERMS_BASED_TRANSITIONS_FIELDNAMES
                            )
                    )
                    state_recorder = (
                            recorders.DetailedTermsBasedStateRecorder(
                                    annotated_interactions,
                                    parameters_out_csvwriter,
                                    links_out_csvwriter,
                                    terms_out_csvwriter,
                                    transitions_out_csvwriter
                            )
                    )
                else:
                    transitions_out_csvwriter = (
                            convutils.make_csv_dict_writer(
                                    input_data.transitions_outfile,
                                    TRANSITIONS_FIELDNAMES
                            )
                    )
                    state_recorder = (
                            recorders.TermsBasedStateRecorder(
                                    annotated_interactions,
                                    parameters_out_csvwriter,
                                    links_out_csvwriter,
                                    terms_out_csvwriter,
                                    transitions_out_csvwriter
                            )
                    )
                if input_data.intraterms:
                    logger.info("Considering intra-term interactions.")
                    links_state_class = states.IntraTermsAndLinksState
                else:
                    links_state_class = states.TermsAndLinksState

                markov_chain = chains.TermsBasedMarkovChain(
                        state_recorder,
                        input_data.burn_in,
                        input_data.steps,
                        annotated_interactions,
                        input_data.activity_threshold,
                        transition_type_ratio=input_data.transition_ratio,
                        seed_links_indices=seed_links,
                        link_false_pos=input_data.link_false_pos,
                        link_false_neg=input_data.link_false_neg,
                        link_prior=input_data.link_prior,
                        term_prior=input_data.term_prior,
                        parameters_state_class=parameters_state_class,
                        links_state_class=links_state_class,
                )
    else:
        if input_data.disable_swaps:
            logger.info("Disabling swap transitions.")
            links_state_class = states.NoSwapArrayLinksState
        else:
            links_state_class = states.ArrayLinksState
        if input_data.detailed_transitions:
            logger.info("Recording extra information for each state.")
            transitions_out_csvwriter = convutils.make_csv_dict_writer(
                    input_data.transitions_outfile,
                    DETAILED_TRANSITIONS_FIELDNAMES
            )
            if input_data.record_frequencies:
                logger.info("Recording frequency information for each "
                "state.")
                state_recorder = recorders.FrequencyDetailedArrayStateRecorder(
                        annotated_interactions,
                        parameters_out_csvwriter,
                        links_out_csvwriter,
                        transitions_out_csvwriter
                )
            else:
                state_recorder = recorders.DetailedArrayStateRecorder(
                        annotated_interactions,
                        parameters_out_csvwriter,
                        links_out_csvwriter,
                        transitions_out_csvwriter
                )
        else:
            transitions_out_csvwriter = convutils.make_csv_dict_writer(
                    input_data.transitions_outfile,
                    TRANSITIONS_FIELDNAMES,
                    # TODO: This is a hack to force
                    # FrequencyDetailedArrayStateRecorder to work
                    # without the details transitions flag
                    extrasaction="ignore"
            )
            if input_data.record_frequencies:
                logger.info("Recording frequency information for each "
                "state.")
                state_recorder = recorders.FrequencyDetailedArrayStateRecorder(
                        annotated_interactions,
                        parameters_out_csvwriter,
                        links_out_csvwriter,
                        transitions_out_csvwriter
                )
            else:
                state_recorder = recorders.ArrayStateRecorder(
                    annotated_interactions,
                    parameters_out_csvwriter,
                    links_out_csvwriter,
                    transitions_out_csvwriter
                )
        markov_chain = chains.ArrayMarkovChain(
                state_recorder,
                input_data.burn_in,
                input_data.steps,
                annotated_interactions,
                input_data.activity_threshold,
                transition_type_ratio=input_data.transition_ratio,
                seed_links_indices=seed_links,
                link_false_pos=input_data.link_false_pos,
                link_false_neg=input_data.link_false_neg,
                link_prior=input_data.link_prior,
                parameters_state_class=parameters_state_class,
                links_state_class=links_state_class,
        )

    logger.debug("""\
Chain information:
    Chain class: {chain.__class__}
    Overall class: {chain.current_state.__class__}
    Links class: {chain.current_state.links_state.__class__}
    Parameters class: {chain.current_state.parameters_state.__class__}\
""".format(chain=markov_chain))
    logger.info("Beginning to run through states in the chain. This "
            "may take a while...")

    markov_chain.run()
    logger.info("Run completed.")

    logger.info("Writing link results to {0}".format(
            input_data.links_outfile.name))
    markov_chain.state_recorder.write_links_probabilities()
    logger.info("Writing parameter results to {0}".format(
            input_data.parameters_outfile.name))
    markov_chain.state_recorder.write_parameters_probabilities()
    if input_data.terms_based:
        logger.info("Writing terms data to {0}.".format(
            input_data.terms_outfile.name))
        markov_chain.state_recorder.write_terms_probabilities()
    markov_chain.state_recorder.write_transition_states()
    logger.info("Transitions data written to {0}.".format(
            input_data.transitions_outfile.name))
    if input_data.record_frequencies:
        logger.info("Writing state frequencies to {0}".format(
                input_data.frequencies_outfile.name))
        if "ArrayMarkovChain" in markov_chain.__class__.__name__:
            markov_chain.state_recorder.write_state_frequencies(
                    input_data.frequencies_outfile,
                    input_data.activity_threshold,
                    input_data.transition_ratio,
                    input_data.link_false_pos,
                    input_data.link_false_neg,
                    input_data.link_prior,
                    parameters_state_class,
                    links_state_class
            )
        logger.info("State frequencies written.")

    ending_time = datetime.datetime.now()
    logger.info("Finished.")
    running_time = ending_time - starting_time
    hours, remainder = divmod(running_time.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    hours += running_time.days * 24
    logger.info("Running time: {0}h {1}m {2}s".format(hours, minutes,
        seconds))

Exemple #5

0

Afficher le fichier

Fichier : cbpn.py Projet : EdwardBetts/BiologicalProcessNetworks

def calculate_and_output_results_resampling(
        outfileh,
        pairs,
        total_pairs,
        interactions_graph,
        annotations_dict,
        num_permutations,
        use_estimation=True,
        score_correction=False
    ):
    """Calculates the significance of a link between each given pair of
    annotation terms using resampling of genes annotated by the second
    term.

    :Parameters:
    - `outfileh`: a file handle to a file for output
    - `pairs`: an iterable of pairs of annotation terms
    - `total_pairs`: the number of total annotation pairs to be
      processed
    - `interactions_graph`: graph containing the gene-gene or gene
      product-gene product interactions
    - `annotations_dict`: a dictionary with annotation terms as keys and
      `set`s of genes as values
    - `num_permutations`: maximum number of permutations to perform
      [NOTE: see `use_estimation`]
    - `use_estimation`: estimate significances for pairs which are
      unlikely to have significant scores [default: `True`] [NOTE: using
      this option will not guarantee that the number of permutations
      specified by `num_permutations` will be performed.]
    - `score_correction`: if `True`, perform correction on scores using
      an expected value computed from the mean expression value
      [default: `False`]

    """
    # Create the output CSV file.
    csv_writer = convutils.make_csv_dict_writer(outfileh, OUTFILE_FIELDS)
    # Set up the test results iterator.
    significance_results = compute_significance_for_pairs(
            pairs,
            interactions_graph,
            annotations_dict,
            num_permutations,
            use_estimation,
            score_correction
    )

    results_for_output = []
    # Output the test results.
    for i, pair_results in enumerate(significance_results):
        results_for_output.append(pair_results)
        # periodically flush results to disk
        if not ((i + 1) % RESULTS_BUFFER_SIZE):
            percent_done = int(
                    math.floor(100 * (i + 1) / float(total_pairs))
            )
            logger.info("%d of %d (%d%%) pairs processed. "
                    "Writing to %s." % (i + 1, total_pairs,
                    percent_done,
                    outfileh.name)
            )

            write_results_to_csv(csv_writer, results_for_output)
            # flush the scores
            results_for_output = []
            outfileh.flush()

    logger.info("All %d pairs processed." % total_pairs)
    if results_for_output:
        logger.info("Writing to %s" % outfileh.name)
        write_results_to_csv(csv_writer, results_for_output)