Ejemplo n.º 1
0
def load_mutation_heat(args):
    samples = hnio.load_samples(args.sample_file) if args.sample_file else None
    genes = hnio.load_genes(args.gene_file) if args.gene_file else None
    snvs = hnio.load_snvs(args.snv_file, genes, samples)
    cnas = hnio.load_cnas(args.cna_file, genes, samples) if args.cna_file else []
    if args.cna_filter_threshold:
        cnas = hnheat.filter_cnas(cnas, args.cna_filter_threshold)
    
    if not samples:
        samples = set([snv.sample for snv in snvs] + [cna.sample for cna in cnas])
    return hnheat.mut_heat(len(samples), snvs, cnas, args.min_freq), None
Ejemplo n.º 2
0
def load_mutation_heat(args):
    genes = hnio.load_genes(args.gene_file) if args.gene_file else None
    samples = hnio.load_samples(args.sample_file) if args.sample_file else None
    snvs = hnio.load_snvs(args.snv_file, genes, samples)
    cnas = hnio.load_cnas(args.cna_file, genes,
                          samples) if args.cna_file else []
    if args.cna_filter_threshold:
        cnas = hnheat.filter_cnas(cnas, args.cna_filter_threshold)
    if not samples:
        samples = set([snv.sample
                       for snv in snvs] + [cna.sample for cna in cnas])
    if not genes:
        genes = set([snv.gene for snv in snvs] + [cna.gene for cna in cnas])
    return hnheat.mut_heat(genes, len(samples), snvs, cnas, args.min_freq)
Ejemplo n.º 3
0
def run(args):
    subnetworks_file = '%s/viz_files/%s' % (str(hotnet2.__file__).rsplit('/', 1)[0], VIZ_SUBNETWORKS)

    # create output directory if doesn't exist; warn if it exists and is not empty
    outdir = args.output_directory
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    if len(os.listdir(outdir)) > 0:
        print("WARNING: Output directory is not empty. Any conflicting files will be overwritten. "
              "(Ctrl-c to cancel).")

    ks = set()
    output = dict(deltas=[], subnetworks=dict(), mutation_matrices=dict(), stats=dict())
    subnetworks = dict()
    for results_file in args.results_files:
        results = json.load(open(results_file))
        ccs = results['components']

        heat_file = json.load(open(results['parameters']['heat_file']))
        gene2heat = heat_file['heat']
        heat_parameters = heat_file['parameters']
        d_score = hnio.load_display_score_tsv(args.display_score_file) if args.display_score_file else None
        d_name = hnio.load_display_name_tsv(args.display_name_file) if args.display_name_file else dict()
        edges = hnio.load_ppi_edges(args.edge_file, hnio.load_index(results['parameters']['infmat_index_file']))
        delta = format(results['parameters']['delta'], 'g')
        output['deltas'].append(delta)
        subnetworks[delta] = ccs

        output["subnetworks"][delta] = []
        for cc in ccs:
            output['subnetworks'][delta].append(viz.get_component_json(cc, gene2heat, edges,
                                                                args.network_name, d_score, d_name))
            
        # make oncoprints if heat file was generated from mutation data
        if 'heat_fn' in heat_parameters and heat_parameters['heat_fn'] == 'load_mutation_heat':
            output['mutation_matrices'][delta] = list()
            samples = hnio.load_samples(heat_parameters['sample_file']) if heat_parameters['sample_file'] else None
            genes = hnio.load_genes(heat_parameters['gene_file']) if heat_parameters['gene_file'] else None
            snvs = hnio.load_snvs(heat_parameters['snv_file'], genes, samples) if heat_parameters['snv_file'] else []
            cnas = hnio.load_cnas(heat_parameters['cna_file'], genes, samples) if heat_parameters['cna_file'] else []

            for cc in ccs:
                output['mutation_matrices'][delta].append(viz.get_oncoprint_json(cc, snvs, cnas, d_name))

            if heat_parameters.get('sample_type_file'):
                with open(heat_parameters['sample_type_file']) as f:
                    output['sampleToTypes'] = dict(l.rstrip().split() for l in f if not l.startswith("#") )
                    output['typeToSamples'] = dict((t, []) for t in set(output['sampleToTypes'].values()))
                    for s, ty in output['sampleToTypes'].iteritems():
                        output['typeToSamples'][ty].append( s )
            else:
                output['sampleToTypes'] = dict( (s, "Cancer") for s in samples )
                output['typeToSamples'] = dict(Cancer=list(samples))

        output['stats'][delta] = results['statistics']
        for k in sorted(map(int, results['statistics'].keys())):
            ks.add(k)
            continue
            stats = results['statistics'][str(k)]
            output['stats'][delta].append( dict(k=k, expected=stats['expected'], observed=stats['observed'], pval=stats['pval']))

    output['ks'] = range(min(ks), max(ks)+1)
    with open('%s/subnetworks.json' % outdir, 'w') as out:
        json.dump(output, out, indent=4)

    shutil.copy(subnetworks_file, '%s/%s' % (outdir, VIZ_INDEX))
Ejemplo n.º 4
0
def run(args):
    subnetworks_file = '%s/viz_files/%s' % (str(hotnet2.__file__).rsplit('/', 1)[0], VIZ_SUBNETWORKS)

    # create output directory if doesn't exist; warn if it exists and is not empty
    outdir = args.output_directory
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    if len(os.listdir(outdir)) > 0:
        print("WARNING: Output directory is not empty. Any conflicting files will be overwritten. "
              "(Ctrl-c to cancel).")

    ks = set()
    output = dict(deltas=[], subnetworks=dict(), mutation_matrices=dict(), stats=dict())
    predictions = set()
    multipleHeatFiles = False
    for results_file in args.results_files:
        with open(results_file, 'r') as IN:
            results = json.load(IN)
            ccs = results['components']

        heat_file = json.load(open(results['parameters']['heat_file']))
        gene2heat = heat_file['heat']
        heat_parameters = heat_file['parameters']
        d_score = hnio.load_display_score_tsv(args.display_score_file) if args.display_score_file else None
        d_name = hnio.load_display_name_tsv(args.display_name_file) if args.display_name_file else dict()
        edges = hnio.load_ppi_edges(args.edge_file, hnio.load_index(results['parameters']['infmat_index_file']))
        delta = format(results['parameters']['delta'], 'g')
        output['deltas'].append(delta)

        output["subnetworks"][delta] = []
        predictions |= set( g for cc in ccs for g in cc )
        for cc in ccs:
            output['subnetworks'][delta].append(viz.get_component_json(cc, gene2heat, edges,
                                                                args.network_name, d_score, d_name))
        # Record the heat scores
        if 'geneToHeat' in output:
            if any( output['geneToHeat'][g] != h for g, h in gene2heat.iteritems() ) or len(gene2heat.keys()) != len(output['geneToHeat'].keys()):
                multipleHeatFiles = True
        output['geneToHeat'] = gene2heat

        # make oncoprints if heat file was generated from mutation data
        if 'heat_fn' in heat_parameters and heat_parameters['heat_fn'] == 'load_mutation_heat':
            output['mutation_matrices'][delta] = list()
            samples = hnio.load_samples(heat_parameters['sample_file']) if heat_parameters['sample_file'] else None
            genes = hnio.load_genes(heat_parameters['gene_file']) if heat_parameters['gene_file'] else None
            snvs = hnio.load_snvs(heat_parameters['snv_file'], genes, samples) if heat_parameters['snv_file'] else []
            cnas = hnio.load_cnas(heat_parameters['cna_file'], genes, samples) if heat_parameters['cna_file'] else []

            # Get the samples and genes from the mutations directly if they weren't provided
            if not samples:
                samples = set( m.sample for m in snvs ) | set( m.sample for m in cnas )
            if not genes:
                genes = set( m.gene for m in snvs) | set( m.gene for m in cnas )

            for cc in ccs:
                output['mutation_matrices'][delta].append(viz.get_oncoprint_json(cc, snvs, cnas, d_name))

            if heat_parameters.get('sample_type_file'):
                with open(heat_parameters['sample_type_file']) as f:
                    output['sampleToTypes'] = dict(l.rstrip().split() for l in f if not l.startswith("#") )
                    output['typeToSamples'] = dict((t, []) for t in set(output['sampleToTypes'].values()))
                    for s, ty in output['sampleToTypes'].iteritems():
                        output['typeToSamples'][ty].append( s )
            else:
                if not samples:
                    samples = set( m.sample for m in snvs ) | set( m.sample for m in cnas )
                output['sampleToTypes'] = dict( (s, "Cancer") for s in samples )
                output['typeToSamples'] = dict(Cancer=list(samples))

        output['stats'][delta] = results['statistics']
        ks |= set(map(int, results['statistics'].keys()))

    # Print a warning if there were multiple heat files referenced by
    # the results files
    if multipleHeatFiles:
        sys.stderr.write('Warning: results files used multiple heat files. Only the last heat file will be used to tabulate scores.\n')

    # Output to file
    output['predictions'] = sorted(predictions) # list of nodes found in any run
    output['ks'] = range(min(ks), max(ks)+1)
    with open('%s/subnetworks.json' % outdir, 'w') as out:
        json.dump(output, out, indent=4)

    shutil.copy(subnetworks_file, '%s/%s' % (outdir, VIZ_INDEX))
Ejemplo n.º 5
0
def run(args):
    subnetworks_file = '%s/viz_files/%s' % (str(hotnet2.__file__).rsplit(
        '/', 1)[0], VIZ_SUBNETWORKS)

    # create output directory if doesn't exist; warn if it exists and is not empty
    outdir = args.output_directory
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    if len(os.listdir(outdir)) > 0:
        print(
            "WARNING: Output directory is not empty. Any conflicting files will be overwritten. "
            "(Ctrl-c to cancel).")

    ks = set()
    output = dict(deltas=[],
                  subnetworks=dict(),
                  mutation_matrices=dict(),
                  stats=dict())
    subnetworks = dict()
    for results_file in args.results_files:
        results = json.load(open(results_file))
        ccs = results['components']

        heat_file = json.load(open(results['parameters']['heat_file']))
        gene2heat = heat_file['heat']
        heat_parameters = heat_file['parameters']
        d_score = hnio.load_display_score_tsv(
            args.display_score_file) if args.display_score_file else None
        d_name = hnio.load_display_name_tsv(
            args.display_name_file) if args.display_name_file else dict()
        edges = hnio.load_ppi_edges(
            args.edge_file,
            hnio.load_index(results['parameters']['infmat_index_file']))
        delta = format(results['parameters']['delta'], 'g')
        output['deltas'].append(delta)
        subnetworks[delta] = ccs

        output["subnetworks"][delta] = []
        for cc in ccs:
            output['subnetworks'][delta].append(
                viz.get_component_json(cc, gene2heat, edges, args.network_name,
                                       d_score, d_name))

        # make oncoprints if heat file was generated from mutation data
        if 'heat_fn' in heat_parameters and heat_parameters[
                'heat_fn'] == 'load_mutation_heat':
            output['mutation_matrices'][delta] = list()
            samples = hnio.load_samples(
                heat_parameters['sample_file']
            ) if heat_parameters['sample_file'] else None
            genes = hnio.load_genes(heat_parameters['gene_file']
                                    ) if heat_parameters['gene_file'] else None
            snvs = hnio.load_snvs(
                heat_parameters['snv_file'], genes,
                samples) if heat_parameters['snv_file'] else []
            cnas = hnio.load_cnas(
                heat_parameters['cna_file'], genes,
                samples) if heat_parameters['cna_file'] else []

            # Get the samples and genes from the mutations directly if they weren't provided
            if not samples:
                samples = set(m.sample for m in snvs) | set(m.sample
                                                            for m in cnas)
            if not genes:
                genes = set(m.gene for m in snvs) | set(m.gene for m in cnas)

            for cc in ccs:
                output['mutation_matrices'][delta].append(
                    viz.get_oncoprint_json(cc, snvs, cnas, d_name))

            if heat_parameters.get('sample_type_file'):
                with open(heat_parameters['sample_type_file']) as f:
                    output['sampleToTypes'] = dict(l.rstrip().split()
                                                   for l in f
                                                   if not l.startswith("#"))
                    output['typeToSamples'] = dict(
                        (t, []) for t in set(output['sampleToTypes'].values()))
                    for s, ty in output['sampleToTypes'].iteritems():
                        output['typeToSamples'][ty].append(s)
            else:
                output['sampleToTypes'] = dict((s, "Cancer") for s in samples)
                output['typeToSamples'] = dict(Cancer=list(samples))

        output['stats'][delta] = results['statistics']
        ks |= set(map(int, results['statistics'].keys()))

    output['ks'] = range(min(ks), max(ks) + 1)
    with open('%s/subnetworks.json' % outdir, 'w') as out:
        json.dump(output, out, indent=4)

    shutil.copy(subnetworks_file, '%s/%s' % (outdir, VIZ_INDEX))