def val_df_to_sharedness(value_df, headers): """ NB only for use with the phospho data :param value_df: A pandas dataframe with samples for columns, peps/prots for rows, with vals = affinity/abundance :param headers: headers for final table :return: a longform dataframe for plotting """ vals = [] # Just in cultured for thing in [ x for x in list(value_df.index) if value_df.loc[x][0] > 0 and sum(value_df.loc[x][1:]) == 0 ]: vals.append(['C-only', thing] + [x for x in value_df.loc[thing] if x > 0]) # Just in one single mouse for thing in [ x for x in list(value_df.index) if len([y for y in list(value_df.loc[x]) if y > 0]) == 1 and value_df.loc[x][0] == 0 ]: vals.append(['1m-only', thing] + [x for x in value_df.loc[thing] if x > 0]) # In cultured and X #s mice mouse only for i in range(2, 6): for thing in [ x for x in list(value_df.index) if len([y for y in list(value_df.loc[x]) if y > 0]) == i and value_df.loc[x][0] > 0 ]: for val in [x for x in value_df.loc[thing] if x > 0]: vals.append(['C+' + str(i - 1) + 'm', thing, val]) return fxn.list_to_df(vals, headers, False)
# properties = GlobalDescriptor(peptides[gp]) properties.charge_density(ph=7.4, amide=True) charge_densities[gp] = [x[0] for x in properties.descriptor] for val in charge_densities[gp]: charge_densities_long.append([gp, val]) # polarities[gp] = get_peptide_values(peptides[gp], 'polarity') for val in polarities[gp]: polarities_long.append([gp, val]) # gravy[gp] = get_peptide_values(peptides[gp], 'gravy') for val in gravy[gp]: gravy_long.append([gp, val]) eisenbergs_long = fxn.list_to_df(eisenbergs_long, ['Group', 'Hydrophobicity'], False) charges_long = fxn.list_to_df(charges_long, ['Group', 'Charge'], False) charge_densities_long = fxn.list_to_df(charge_densities_long, ['Group', 'Charge density'], False) polarities_long = fxn.list_to_df(polarities_long, ['Group', 'Polarity'], False) gravy_long = fxn.list_to_df(gravy_long, ['Group', 'Hydrophobicity'], False) ordr = ['Cultured', 'Mouse', 'Both'] sns.violinplot(data=eisenbergs_long, x='Group', y='Hydrophobicity', order=ordr) plt.xlabel("") plt.ylabel("Hydrophobicity (Eisenberg scale)") plt.savefig(plot_dir + 'eisenberg-hydrophobicity.png', dpi=300, bbox_inches='tight') plt.close()
protein_dict[protein][nam] = prot_dat[nam][protein] samples = pep_dat.keys() samples.sort() peptide_list = [] for peptide in peptide_dict: # peptide_list.append([peptide, peptide_dict[peptide]['Hunt'], peptide_dict[peptide]['Cultured'], peptide_list.append([ peptide, peptide_dict[peptide]['C'], peptide_dict[peptide]['M01'], peptide_dict[peptide]['M04'], peptide_dict[peptide]['M07'], peptide_dict[peptide]['M10'] ]) peptides = fxn.list_to_df(peptide_list, ['Peptide', 'C', 'M01', 'M04', 'M07', 'M10'], True) protein_list = [] for protein in protein_dict: protein_list.append([ protein, protein_dict[protein]['C'], protein_dict[protein]['M01'], protein_dict[protein]['M04'], protein_dict[protein]['M07'], protein_dict[protein]['M10'] ]) proteins = fxn.list_to_df(protein_list, ['Protein', 'C', 'M01', 'M04', 'M07', 'M10'], True) # Plot total number of peptides detected
y_type = get_data_source(pep_samples[y]) comparison_type = [x_type, y_type] comparison_type.sort() if comparison_type != ['control', 'control']: for jacc in sampled_jacc[x][y]: sampled_jacc_long.append( ['-\n'.join(comparison_type), jacc]) whole_jacc_long.append([ '-\n'.join(comparison_type), fxn.jaccard(peptides[pep_samples[x]], peptides[pep_samples[y]]) ]) done_comparisons.append(str(x) + '-' + str(y)) done_comparisons.append(str(y) + '-' + str(x)) sampled_jacc_long = fxn.list_to_df(sampled_jacc_long, ['Comparison', 'Jaccard'], False) whole_jacc_long = fxn.list_to_df(whole_jacc_long, ['Comparison', 'Jaccard'], False) col_order = [ 'control-\npublished', 'control-\nculture', 'control-\nmouse', 'published-\npublished', 'culture-\npublished', 'mouse-\npublished', 'culture-\nmouse', 'mouse-\nmouse', 'culture-\nculture' ] fig = plt.figure(figsize=(8, 5)) ax = fig.add_subplot(111) sns.violinplot(x='Comparison', y='Jaccard', data=sampled_jacc_long, order=col_order,
# Read in data weights = [] with open('../Data/sample-weights.csv', 'rU') as in_file: line_count = 0 for line in in_file: bits = line.rstrip().split(',') if line_count == 0: headers = bits else: weights.append([int(x) for x in bits[:2]] + [bits[2]]) line_count += 1 headers[0] = 'Day' headers[2] = 'Growth type' weights = fxn.list_to_df(weights, headers, False) weights = weights.sort_values(by='Growth type') fig = plt.figure(figsize=(5, 5)) ax = fig.add_subplot(111) sns.scatterplot(data=weights, x='Day', y='mg', hue='Growth type') plt.ylabel("Cumulative sample weight (mg)") plt.xlabel("Days of growth") plt.savefig(plot_dir + 'weight-over-time.png', dpi=300, bbox_inches='tight') plt.close() # Read in number of peptides to check correlation with input xenograft tumor weight with open('../Data/mouse-weights-v-peptides.csv', 'rU') as in_file: wvp = [] line_count = 0
# Go through the matches and count! vals = [] positives = {} totals = {} col_order = ['Cultured', 'Mouse', 'Both'] for growth_type in col_order: positive = len( [x for x in matches[growth_type] if matches[growth_type][x] > 0]) total = len(matches[growth_type]) print growth_type, positive, total, positive / total vals.append([growth_type, positive / total]) positives[growth_type] = positive totals[growth_type] = total vals = fxn.list_to_df(vals, ['Type', 'Proportion'], False) # Finally plot fig = plt.figure(figsize=(4.5, 5)) ax = fig.add_subplot(111) sns.barplot(data=vals, x='Type', y='Proportion', palette=fxn.c_m_mix_cols, order=col_order) for x in range(len(col_order)): plot_text = str(positives[col_order[x]]) + '/\n' + str( totals[col_order[x]]) print plot_text ax.text(x, .64,
elif best_allele == 'HLA-B0702': allele_count['HLA-B*\n07:02'] += 1 elif best_allele == 'HLA-C0702': allele_count['HLA-C*\n07:02'] += 1 else: allele_count['NPB'] += 1 # No predicted binder # Write out to nested list, to be turned into long proportions df, to be plotted as in peptide-analysis.py type_key = {'C': 'Cultured', 'M': 'Mouse'} for allele in allele_count: predictions.append([ type_key[nam[0]], allele, allele_count[allele] / sum(allele_count.values()) ]) props = fxn.list_to_df(predictions, ['Type', 'Allele', 'Proportion'], False) hla_alleles = ['HLA-A*\n02:01', 'HLA-B*\n07:02', 'HLA-C*\n07:02', 'NPB'] # HLA allele contribution barplot fig = plt.figure(figsize=(4.5, 5)) ax = fig.add_subplot(111) sns.barplot(data=props, x='Allele', y='Proportion', hue='Type', order=hla_alleles) sns.swarmplot(data=props, x='Allele', y='Proportion', hue='Type',
len(peptides[cell_line][growth_type]) ]) # Relative length distribution calculation for peptide in peptides[cell_line][growth_type]: lengths_raw[cell_line][growth_type][len(peptide)] += 1 for i in range(8, 16): proportion = lengths_raw[cell_line][growth_type][i] / sum( lengths_raw[cell_line][growth_type].values()) peptide_lens_props.append( [cell_line, i, proportion, nam_key[growth_type]]) # Relative length distribution plotting peptide_lens_props = fxn.list_to_df( peptide_lens_props, ['Cell Line', 'Length', 'Proportion', 'Type'], False) fig = plt.figure(figsize=(3, 5)) ax = fig.add_subplot(111) sns.barplot(x='Length', y='Proportion', hue='Type', data=peptide_lens_props) plt.xlabel("Length (amino acids)") plt.savefig(plot_dir + cell_line + '-length-bars.png', dpi=300, bbox_inches='tight') plt.close() # Total overlap Venn (unsampled)