def val_df_to_sharedness(value_df, headers):
    """
    NB only for use with the phospho data
    :param value_df: A pandas dataframe with samples for columns, peps/prots for rows, with vals = affinity/abundance
    :param headers: headers for final table
    :return: a longform dataframe for plotting
    """
    vals = []
    # Just in cultured
    for thing in [
            x for x in list(value_df.index)
            if value_df.loc[x][0] > 0 and sum(value_df.loc[x][1:]) == 0
    ]:
        vals.append(['C-only', thing] +
                    [x for x in value_df.loc[thing] if x > 0])

    # Just in one single mouse
    for thing in [
            x for x in list(value_df.index)
            if len([y for y in list(value_df.loc[x])
                    if y > 0]) == 1 and value_df.loc[x][0] == 0
    ]:
        vals.append(['1m-only', thing] +
                    [x for x in value_df.loc[thing] if x > 0])

    # In cultured and X #s mice mouse only
    for i in range(2, 6):
        for thing in [
                x for x in list(value_df.index)
                if len([y for y in list(value_df.loc[x])
                        if y > 0]) == i and value_df.loc[x][0] > 0
        ]:
            for val in [x for x in value_df.loc[thing] if x > 0]:
                vals.append(['C+' + str(i - 1) + 'm', thing, val])

    return fxn.list_to_df(vals, headers, False)
    #
    properties = GlobalDescriptor(peptides[gp])
    properties.charge_density(ph=7.4, amide=True)
    charge_densities[gp] = [x[0] for x in properties.descriptor]
    for val in charge_densities[gp]:
        charge_densities_long.append([gp, val])
    #
    polarities[gp] = get_peptide_values(peptides[gp], 'polarity')
    for val in polarities[gp]:
        polarities_long.append([gp, val])
    #
    gravy[gp] = get_peptide_values(peptides[gp], 'gravy')
    for val in gravy[gp]:
        gravy_long.append([gp, val])

eisenbergs_long = fxn.list_to_df(eisenbergs_long, ['Group', 'Hydrophobicity'],
                                 False)
charges_long = fxn.list_to_df(charges_long, ['Group', 'Charge'], False)
charge_densities_long = fxn.list_to_df(charge_densities_long,
                                       ['Group', 'Charge density'], False)
polarities_long = fxn.list_to_df(polarities_long, ['Group', 'Polarity'], False)
gravy_long = fxn.list_to_df(gravy_long, ['Group', 'Hydrophobicity'], False)

ordr = ['Cultured', 'Mouse', 'Both']

sns.violinplot(data=eisenbergs_long, x='Group', y='Hydrophobicity', order=ordr)
plt.xlabel("")
plt.ylabel("Hydrophobicity (Eisenberg scale)")
plt.savefig(plot_dir + 'eisenberg-hydrophobicity.png',
            dpi=300,
            bbox_inches='tight')
plt.close()
                protein_dict[protein][nam] = prot_dat[nam][protein]

    samples = pep_dat.keys()
    samples.sort()

    peptide_list = []
    for peptide in peptide_dict:
        # peptide_list.append([peptide, peptide_dict[peptide]['Hunt'], peptide_dict[peptide]['Cultured'],
        peptide_list.append([
            peptide, peptide_dict[peptide]['C'], peptide_dict[peptide]['M01'],
            peptide_dict[peptide]['M04'], peptide_dict[peptide]['M07'],
            peptide_dict[peptide]['M10']
        ])

    peptides = fxn.list_to_df(peptide_list,
                              ['Peptide', 'C', 'M01', 'M04', 'M07', 'M10'],
                              True)

    protein_list = []
    for protein in protein_dict:
        protein_list.append([
            protein, protein_dict[protein]['C'], protein_dict[protein]['M01'],
            protein_dict[protein]['M04'], protein_dict[protein]['M07'],
            protein_dict[protein]['M10']
        ])

    proteins = fxn.list_to_df(protein_list,
                              ['Protein', 'C', 'M01', 'M04', 'M07', 'M10'],
                              True)

    # Plot total number of peptides detected
Example #4
0
                y_type = get_data_source(pep_samples[y])
                comparison_type = [x_type, y_type]
                comparison_type.sort()
                if comparison_type != ['control', 'control']:
                    for jacc in sampled_jacc[x][y]:
                        sampled_jacc_long.append(
                            ['-\n'.join(comparison_type), jacc])
                    whole_jacc_long.append([
                        '-\n'.join(comparison_type),
                        fxn.jaccard(peptides[pep_samples[x]],
                                    peptides[pep_samples[y]])
                    ])
                done_comparisons.append(str(x) + '-' + str(y))
                done_comparisons.append(str(y) + '-' + str(x))

    sampled_jacc_long = fxn.list_to_df(sampled_jacc_long,
                                       ['Comparison', 'Jaccard'], False)
    whole_jacc_long = fxn.list_to_df(whole_jacc_long,
                                     ['Comparison', 'Jaccard'], False)

    col_order = [
        'control-\npublished', 'control-\nculture', 'control-\nmouse',
        'published-\npublished', 'culture-\npublished', 'mouse-\npublished',
        'culture-\nmouse', 'mouse-\nmouse', 'culture-\nculture'
    ]

    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)
    sns.violinplot(x='Comparison',
                   y='Jaccard',
                   data=sampled_jacc_long,
                   order=col_order,
Example #5
0
# Read in data
weights = []
with open('../Data/sample-weights.csv', 'rU') as in_file:
    line_count = 0
    for line in in_file:
        bits = line.rstrip().split(',')
        if line_count == 0:
            headers = bits
        else:
            weights.append([int(x) for x in bits[:2]] + [bits[2]])
        line_count += 1

headers[0] = 'Day'
headers[2] = 'Growth type'
weights = fxn.list_to_df(weights, headers, False)
weights = weights.sort_values(by='Growth type')

fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)
sns.scatterplot(data=weights, x='Day', y='mg', hue='Growth type')

plt.ylabel("Cumulative sample weight (mg)")
plt.xlabel("Days of growth")
plt.savefig(plot_dir + 'weight-over-time.png', dpi=300, bbox_inches='tight')
plt.close()

# Read in number of peptides to check correlation with input xenograft tumor weight
with open('../Data/mouse-weights-v-peptides.csv', 'rU') as in_file:
    wvp = []
    line_count = 0
    # Go through the matches and count!
    vals = []
    positives = {}
    totals = {}
    col_order = ['Cultured', 'Mouse', 'Both']
    for growth_type in col_order:
        positive = len(
            [x for x in matches[growth_type] if matches[growth_type][x] > 0])
        total = len(matches[growth_type])
        print growth_type, positive, total, positive / total
        vals.append([growth_type, positive / total])
        positives[growth_type] = positive
        totals[growth_type] = total

    vals = fxn.list_to_df(vals, ['Type', 'Proportion'], False)

    # Finally plot
    fig = plt.figure(figsize=(4.5, 5))
    ax = fig.add_subplot(111)
    sns.barplot(data=vals,
                x='Type',
                y='Proportion',
                palette=fxn.c_m_mix_cols,
                order=col_order)
    for x in range(len(col_order)):
        plot_text = str(positives[col_order[x]]) + '/\n' + str(
            totals[col_order[x]])
        print plot_text
        ax.text(x,
                .64,
                elif best_allele == 'HLA-B0702':
                    allele_count['HLA-B*\n07:02'] += 1
                elif best_allele == 'HLA-C0702':
                    allele_count['HLA-C*\n07:02'] += 1
            else:
                allele_count['NPB'] += 1  # No predicted binder

        # Write out to nested list, to be turned into long proportions df, to be plotted as in peptide-analysis.py
        type_key = {'C': 'Cultured', 'M': 'Mouse'}
        for allele in allele_count:
            predictions.append([
                type_key[nam[0]], allele,
                allele_count[allele] / sum(allele_count.values())
            ])

    props = fxn.list_to_df(predictions, ['Type', 'Allele', 'Proportion'],
                           False)
    hla_alleles = ['HLA-A*\n02:01', 'HLA-B*\n07:02', 'HLA-C*\n07:02', 'NPB']

    # HLA allele contribution barplot
    fig = plt.figure(figsize=(4.5, 5))
    ax = fig.add_subplot(111)
    sns.barplot(data=props,
                x='Allele',
                y='Proportion',
                hue='Type',
                order=hla_alleles)

    sns.swarmplot(data=props,
                  x='Allele',
                  y='Proportion',
                  hue='Type',
                len(peptides[cell_line][growth_type])
            ])

            # Relative length distribution calculation
            for peptide in peptides[cell_line][growth_type]:
                lengths_raw[cell_line][growth_type][len(peptide)] += 1

            for i in range(8, 16):
                proportion = lengths_raw[cell_line][growth_type][i] / sum(
                    lengths_raw[cell_line][growth_type].values())
                peptide_lens_props.append(
                    [cell_line, i, proportion, nam_key[growth_type]])

        # Relative length distribution plotting
        peptide_lens_props = fxn.list_to_df(
            peptide_lens_props, ['Cell Line', 'Length', 'Proportion', 'Type'],
            False)

        fig = plt.figure(figsize=(3, 5))
        ax = fig.add_subplot(111)
        sns.barplot(x='Length',
                    y='Proportion',
                    hue='Type',
                    data=peptide_lens_props)
        plt.xlabel("Length (amino acids)")
        plt.savefig(plot_dir + cell_line + '-length-bars.png',
                    dpi=300,
                    bbox_inches='tight')
        plt.close()

        # Total overlap Venn (unsampled)