def add_signature(request): """The aim is to retrieve a list of differential expressed genes for certain criteria (e.g. fold_change, p-value, tissue). """ form = SignatureForm(request.POST or None, request.FILES or None) if request.POST: if not "file" in request.POST: file = request.FILES['file'] file.name = file.name.replace('.txt', '') data = file.read().replace('\r', '').split('\n') elif "profile" not in request: msg = "No file or profiles selected. Please provide either a signature "\ "file to upload or select profiles to derive a signature." messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Inferre descriptive informations from the filename: if file.name.startswith('name='): info = dict([item.split('=') for item in file.name.split(';')]) if 'tissue' in info: tissues = info['tissue'].replace('-', '@').replace( ', ', '@').replace(' and ', '@').split( '@') # @ is unlikely to be used as filename. else: tissues = request.POST.getlist('tissues') if 'diet' in request.POST and request.POST['diet']: regimen = Regimen.objects.get(pk=request.POST['diet']) elif "diet" in info: regimen = Regimen.objects.get(shortcut__exact=info['diet']) # Species from form: try: species = Species.objects.get(pk=request.POST['species']) except ValueError as e: msg = "Species not found in Denigma db. %s. Please select a species." % e messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Create signature: signature = Signature(name=request.POST['name'] or info['name'], diet=regimen, species=species) #, signature.save() # Adding tissues: for tissue in tissues: try: tissue = Tissue.objects.get( pk=tissue) #if it is selected from form except: print "Did not found tissue by pk." try: tissue = Tissue.objects.get( name__iexact=tissue ) # If it is inferred from file name. except Tissue.DoesNotExist as e: messages.add_message( request, messages.ERROR, ugettext("%s: %s" % (str(e)[:-1], tissue))) return redirect('/expressions/signature/add/') signature.tissues.add(tissue) print "Tissues:", signature.tissues.all() header = {} for index, column in enumerate(data[0].split('\t')): if "DR" in column: column = "exp" elif "AL" in column: column = "ctr" header[column.lower().replace('gene symbol', 'symbol')\ .replace('gene_symbol', 'symbol')\ .replace(' ', '_')\ .replace('platform_cloneid', 'seq_id')\ .replace('ensembl_gene', 'seq_id')] = index # WTF is this? #num_lines = len(data); counter = 0 print len(data[1:]) for line in data[1:]: #print(line) #print(header) try: #print("Trying") # For effect size ctr_values = [] exp_values = [] #counter += 1 if not line: continue columns = line.split('\t') if len(columns) < len(header): continue #break # seq_id = columns[header['seq_id']] symbol = columns[header['symbol']] if symbol == "None": symbol = None ctr = float(columns[header['ctr']]) exp = float(columns[header['exp']]) if "ratio" in header: ratio = float(columns[header['ratio']]) if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio else: ratio = float( columns[header['fold_change']]) # 2**exp/2**ctr if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio # Calculating effect size: for k, v in header.items(): if k.startswith('ctr') and k != 'ctr': ctr_values.append(float(columns[v])) elif k.startswith('exp') and k != 'exp': exp_values.append(float(columns[v])) # if exp_values and exp_values != ctr_values: # #print exp_values # es = effect_size(exp_values, ctr_values) # else: es = None # if 'pvalue' in header: # pvalue = columns[header['p_value']] # else: if exp_values != ctr_values: pvalue = t_two_sample(ctr_values, exp_values)[1] else: pvalue = 1 transcript = Transcript(seq_id=seq_id, symbol=symbol, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) transcript.save() #print(transcript.id, transcript.symbol, transcript.ratio) expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) #print expression except ValueError as e: print e, symbol, seq_id, fold_change, pvalue, ctr, exp #break #print "Counter=%s; Number of lines:%s" % (counter, num_lines) #if counter == num_lines: msg = "Successfully integrated signature: %s" % signature.name msg_type = messages.SUCCESS #else: # msg = "File upload failed." # msg_type = messages.ERROR messages.add_message(request, msg_type, ugettext(msg)) redirect('/expressions/signatures/') ctx = {'form': form, 'action': 'Add'} return render_to_response('expressions/signature_form.html', ctx, context_instance=RequestContext(request))
def create_signatures(request): """Generates signatures from profiles.""" # Sort profiles according to tissues # compare DR vs. AL. profiles = Profile.objects.all() #print len(profiles) signatures = {} for profile in profiles: tissues = ' '.join([tissue.name for tissue in profile.tissue.all()]) print tissues, profile.diet.shortcut if tissues not in signatures: signatures[tissues] = [None, None] if profile.diet.shortcut == 'DR': signatures[tissues][0] = profile else: signatures[tissues][1] = profile print signatures for tissues, profiles in signatures.items(): print tissues, profiles signature = Signature(name=tissues, species=profiles[0].species, diet=profiles[0].diet) signature.save() for tissue in profiles[0].tissue.all(): signature.tissues.add(tissue) for profile in profiles: #background = [] profile.transcripts = {} probes = Probe.objects.filter(profile=profile) for probe in probes: if not probe.name.startswith('RANDOM'): transcript_name = probe.name.split('P')[0] if transcript_name not in profile.transcripts: profile.transcripts[transcript_name] = [ probe.expression ] else: profile.transcripts[transcript_name].append( probe.expression) #else: # For background subtraction. #background.append(probe.expression) for transcript_name, exp_expression in profiles[0].transcripts.items(): # If expression too low of e.g. 1/3 of probes, exclude probe. # RMA (background subtraction, quantile normalization, and median polishing) # Benjamini p-value exp = sum(exp_expression) / len(exp_expression) ctr_expression = profiles[1].transcripts[transcript_name] ctr = sum(ctr_expression) / len(ctr_expression) ratio = exp / ctr if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio if len(exp_expression) == 1 or len(ctr_expression) == 1: es = pvalue = None else: es = effect_size(exp_expression, ctr_expression) pvalue = t_two_sample(exp_expression, ctr_expression)[1] # Calculate p-value. transcript = Transcript(seq_id=transcript_name, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) transcript.save() expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) print('Done') return redirect('/expressions/signatures/')
def recalculate(self): self.pvalue = t_two_sample(self.exp, self.ctr) self.exp = mean(self.exp) self.ctr = mean(self.ctr) self.ratio = self.exp / self.ctr
def add_signature(request): """The aim is to retrieve a list of differential expressed genes for certain criteria (e.g. fold_change, p-value, tissue). """ form = SignatureForm(request.POST or None, request.FILES or None) if request.POST: if not "file" in request.POST: file = request.FILES['file'] file.name = file.name.replace('.txt', '') data = file.read().replace('\r', '').split('\n') elif "profile" not in request: msg = "No file or profiles selected. Please provide either a signature "\ "file to upload or select profiles to derive a signature." messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Inferre descriptive informations from the filename: if file.name.startswith('name='): info = dict([item.split('=') for item in file.name.split(';')]) if 'tissue' in info: tissues = info['tissue'].replace('-', '@').replace(', ', '@').replace(' and ', '@').split('@') # @ is unlikely to be used as filename. else: tissues = request.POST.getlist('tissues') if 'diet' in request.POST and request.POST['diet']: regimen = Regimen.objects.get(pk=request.POST['diet']) elif "diet" in info: regimen = Regimen.objects.get(shortcut__exact=info['diet']) # Species from form: try: species = Species.objects.get(pk=request.POST['species']) except ValueError as e: msg = "Species not found in Denigma db. %s. Please select a species." % e messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Create signature: signature = Signature(name=request.POST['name'] or info['name'], diet=regimen, species=species)#, signature.save() # Adding tissues: for tissue in tissues: try: tissue = Tissue.objects.get(pk=tissue) #if it is selected from form except: print "Did not found tissue by pk." try: tissue = Tissue.objects.get(name__iexact=tissue) # If it is inferred from file name. except Tissue.DoesNotExist as e: messages.add_message(request, messages.ERROR, ugettext("%s: %s" % (str(e)[:-1], tissue))) return redirect('/expressions/signature/add/') signature.tissues.add(tissue) print "Tissues:", signature.tissues.all() header = {} for index, column in enumerate(data[0].split('\t')): if "DR" in column: column = "exp" elif "AL" in column: column = "ctr" header[column.lower().replace('gene symbol', 'symbol')\ .replace('gene_symbol', 'symbol')\ .replace(' ', '_')\ .replace('platform_cloneid', 'seq_id')\ .replace('ensembl_gene', 'seq_id')] = index # WTF is this? #num_lines = len(data); counter = 0 print len(data[1:]) for line in data[1:]: #print(line) #print(header) try: #print("Trying") # For effect size ctr_values = [] exp_values = [] #counter += 1 if not line: continue columns = line.split('\t') if len(columns) < len(header): continue #break # seq_id = columns[header['seq_id']] symbol = columns[header['symbol']] if symbol == "None": symbol = None ctr = float(columns[header['ctr']]) exp = float(columns[header['exp']]) if "ratio" in header: ratio = float(columns[header['ratio']]) if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio else: ratio = float(columns[header['fold_change']]) # 2**exp/2**ctr if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio # Calculating effect size: for k,v in header.items(): if k.startswith('ctr') and k != 'ctr': ctr_values.append(float(columns[v])) elif k.startswith('exp') and k != 'exp': exp_values.append(float(columns[v])) # if exp_values and exp_values != ctr_values: # #print exp_values # es = effect_size(exp_values, ctr_values) # else: es = None # if 'pvalue' in header: # pvalue = columns[header['p_value']] # else: if exp_values != ctr_values: pvalue = t_two_sample(ctr_values, exp_values)[1] else: pvalue = 1 transcript = Transcript(seq_id=seq_id, symbol=symbol, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) transcript.save() #print(transcript.id, transcript.symbol, transcript.ratio) expression = Expression.objects.create( signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) #print expression except ValueError as e: print e, symbol, seq_id, fold_change, pvalue, ctr, exp #break #print "Counter=%s; Number of lines:%s" % (counter, num_lines) #if counter == num_lines: msg = "Successfully integrated signature: %s" % signature.name msg_type = messages.SUCCESS #else: # msg = "File upload failed." # msg_type = messages.ERROR messages.add_message(request, msg_type, ugettext(msg)) redirect('/expressions/signatures/') ctx = {'form': form, 'action': 'Add'} return render_to_response('expressions/signature_form.html', ctx, context_instance=RequestContext(request))
def create_signatures(request): """Generates signatures from profiles.""" # Sort profiles according to tissues # compare DR vs. AL. profiles = Profile.objects.all() #print len(profiles) signatures = {} for profile in profiles: tissues = ' '.join([tissue.name for tissue in profile.tissue.all()]) print tissues, profile.diet.shortcut if tissues not in signatures: signatures[tissues] = [None, None] if profile.diet.shortcut == 'DR': signatures[tissues][0]= profile else: signatures[tissues][1] = profile print signatures for tissues, profiles in signatures.items(): print tissues, profiles signature = Signature(name=tissues, species=profiles[0].species, diet=profiles[0].diet) signature.save() for tissue in profiles[0].tissue.all(): signature.tissues.add(tissue) for profile in profiles: #background = [] profile.transcripts = {} probes = Probe.objects.filter(profile=profile) for probe in probes: if not probe.name.startswith('RANDOM'): transcript_name = probe.name.split('P')[0] if transcript_name not in profile.transcripts: profile.transcripts[transcript_name] = [probe.expression] else: profile.transcripts[transcript_name].append(probe.expression) #else: # For background subtraction. #background.append(probe.expression) for transcript_name, exp_expression in profiles[0].transcripts.items(): # If expression too low of e.g. 1/3 of probes, exclude probe. # RMA (background subtraction, quantile normalization, and median polishing) # Benjamini p-value exp = sum(exp_expression)/len(exp_expression) ctr_expression = profiles[1].transcripts[transcript_name] ctr = sum(ctr_expression)/len(ctr_expression) ratio = exp/ctr if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio if len(exp_expression) == 1 or len(ctr_expression) == 1: es = pvalue = None else: es = effect_size(exp_expression, ctr_expression) pvalue = t_two_sample(exp_expression, ctr_expression)[1] # Calculate p-value. transcript = Transcript(seq_id=transcript_name, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) transcript.save() expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) print('Done') return redirect('/expressions/signatures/')
def recalculate(self): self.pvalue = t_two_sample(self.exp, self.ctr) self.exp = mean(self.exp) self.ctr = mean(self.ctr) self.ratio = self.exp/self.ctr