def export( self ): print( "Exporting..." ) # Get all transcript IDs results = db_session.query( Transcript.id ).all() n = 0 target_filepath = settings.data_folder + "/transcripts.fasta" output_handle = open( target_filepath, "w" ) for result in results: transcript_id = result[ 0 ] seq_record = Transcript( transcript_id ).get_sequence() if seq_record is None: print( "Missing sequence for [" + transcript_id + "]" ) continue seq_record.id = transcript_id SeqIO.write( seq_record, output_handle, "fasta" ) n += 1 if n % 100 == 0: print( "[{}] sequences written".format( n ) ) output_handle.close() print( "...Saved transcripts to [{}]".format( target_filepath ) )
def insert_db(urls): for url in urls: jsresp = json.loads(requests.get(url).content)['anforande'] t = Transcript() t.transcript_id = jsresp.get("anforande_id") t.transcript = jsresp.get("anforandetext") t.speaker_id = jsresp.get("intressent_id") t.speaker_title = jsresp.get("talare") t.party = jsresp.get("parti") t.section = jsresp.get("avsnittsrubrik") t.date = jsresp.get("dok_datum") db.session.add(t)
def create_student( school_id): # Students must be registered through their school if request.method == "POST": first_name = request.form["first_name"] last_name = request.form["last_name"] email = request.form["email"] password = request.form["password"] # confirm_password = request.form["confirm_password"] if not email.index("@") > 0 and not email.index(".") > email.index( "@"): return redirect(url_for('.home_page')) # if not confirm_password == password: # return redirect(url_for('.home_page')) u = Student.query.filter_by(email=email).count() if u == 0: new_student = Student(first_name, last_name, email, password, int(school_id)) db_session.add(new_student) db_session.commit() transcript = Transcript(new_student.id, int(school_id)) db_session.add(transcript) db_session.commit() log('Student Created') return redirect(url_for('.home_page')) return redirect(url_for('.home_page')) if request.method == "GET": return render_template('student_register.html')
def process_transcript_id(self, transcript_id): print("Aligning ["+transcript_id+"]...") sys.stdout.flush() seqs_to_align = list(Transcript(transcript_id).get_sequences().values()) if len(seqs_to_align) <= 1: print("Warning - not enough sequences to proceed with alignment") return temp_filepath = settings.temp_folder+"/tmp.fa" # output to a fasta file for clustalw alignment output_handle = open(temp_filepath, "w") SeqIO.write(seqs_to_align, output_handle, "fasta") output_handle.close() # run the clustalw alignment clustalw_cline = ClustalwCommandline("clustalw2", infile=temp_filepath, quicktree=True) results = clustalw_cline() # parse the results into the database entries = AlignIO.read(settings.temp_folder+"/tmp.aln", "clustal") for entry in entries: obj = AlignmentEntry(transcript_id, entry.id, str(entry.seq)) db_session.add(obj) db_session.commit() print("Aligned")
def preprocess_all_transcripts(): for dir_path in __list_dir(TRANSCRIPTS_DIR_PATH): term = dir_path[-4:] if VERBOSE: logging.info("Preprocessing term %s ..." % term) transcript_count = 0 for file_path in __list_dir(dir_path): file_name = os.path.basename(file_path) if Transcript.get_or_none( Transcript.file_name == file_name) is None: if VERBOSE: logging.info("Preprocessing document %s/%s ..." % (term, file_name)) transcript = __preprocess_transcript(file_path) if VERBOSE: logging.info( "Done preprocessing document %s/%s. Parsed %s petitioner statements and %s repondent statements." % (term, file_name, len(transcript.petitioner_statements()), len(transcript.respondent_statements()))) transcript_count += 1 if VERBOSE: logging.info("Done preprocessing term %s. Parsed %s transcripts." % (term, transcript_count))
def print_coverage_stats(): total_count = Case.select().count() has_transcript_count = Case.select().where( Case.transcript.is_null(False)).count() coverage = float(has_transcript_count) / float(total_count) logging.info( "There are %s cases and %s cases with transcripts, for a coverage of %s%s." % (total_count, has_transcript_count, round(coverage * 100.0, 2), "%")) total_count = Transcript.select().count() has_case_count = len([ transcript for transcript in Transcript.select() if transcript.cases.count() > 0 ]) coverage = float(has_case_count) / float(total_count) logging.info( "There are %s transcripts and %s transcripts with cases, for a coverage of %s%s." % (total_count, has_case_count, round(coverage * 100.0, 2), "%"))
def __preprocess_transcript(file_path): dir_path, file_name = os.path.split(file_path) meta_dir_path, dir_name = os.path.split(dir_path) term = int(dir_name) docket = None FILE_PATH_REGEX = "(\d\d\-\d+)(?:_[^\.]+)?(?:\[Reargued\])?\.pdf" matches = re.findall(FILE_PATH_REGEX, file_name) if len(matches) == 0: logging.info("Regex didn't match file name: %s." % file_name) elif len(matches) == 1: docket = matches[0] else: docket = matches[0] logging.info("Regex matched file name more than once: %s." % file_name) petitioner_statements, respondent_statements, raw_text, red_flags = __extract_statements( file_path) transcript = Transcript(raw_text=raw_text, term=term, docket=docket, file_name=file_name) transcript = transcript.get_or_create() for statement in petitioner_statements: paragraphs = statement.temp_paragraphs[:] statement.transcript = transcript statement.speaker_is_petitioner = True statement = statement.get_or_create() for paragraph in paragraphs: statement.add_paragraph(paragraph) for statement in respondent_statements: paragraphs = statement.temp_paragraphs[:] statement.transcript = transcript statement.speaker_is_respondent = True statement = statement.get_or_create() for paragraph in paragraphs: statement.add_paragraph(paragraph) for gloss in red_flags: transcript.add_red_flag(gloss) return transcript
def export(self): print("Exporting...") from Bio import SeqIO from database import db_session from models import Transcript import settings # Get all transcript IDs results = db_session \ .query(Transcript.id) \ .all() n = 0 target_filepath = settings.data_folder+"/transcripts.fasta" output_handle = open(target_filepath, "w") for result in results: transcript_id = result[0] seq_record = Transcript(transcript_id).get_sequence() if seq_record == None: print ("Missing sequence for ["+transcript_id+"]") continue seq_record.id = transcript_id SeqIO.write(seq_record, output_handle, "fasta") n += 1 if n % 100 == 0: print("["+str(n)+"] sequences written") output_handle.close() print("...Saved transcripts to ["+target_filepath+"]")
def reconciliate_cases_and_transcripts(): case_dockets = {} transcript_dockets = {} for case in Case.select(): try: case_dockets[preprocess_docket(case.docket)] = case except: continue for transcript in Transcript.select(): try: transcript_dockets[preprocess_docket( transcript.docket)] = transcript except: continue for docket in case_dockets: if docket in transcript_dockets: case = case_dockets[docket] case.transcript = transcript_dockets[docket] case.save()
def get_normalised(self): # Grab sequence string seq_str = Transcript(self.transcript_id).get_sequence_str() # Use the ORM to grab all the normalised stuff results = db_session \ .query(NucleotideMeasurementSet) \ .filter( NucleotideMeasurementSet.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id, NucleotideMeasurementSet.transcript_id==self.transcript_id ) \ .all() measurement_set = results[0] # TODO detect whether float or int and use the correct unpacker. # Needed for raw count values download option unpacked = values_str_unpack_float(measurement_set.values) # index measurements by pos measurements = {} for pos in range(0, len(unpacked)): value = unpacked[pos] measurements[pos + 1] = "NA" if value == None else value # build the output string buf = "" n = 0 for n in range(0, len(seq_str)): pos = n + 1 measurement = "NA" if pos not in measurements else measurements[pos] buf += str(pos)+"\t"+ \ seq_str[n]+"\t"+ \ str(measurement)+"\n" n += 1 return buf
def create_signatures(request): """Generates signatures from profiles.""" # Sort profiles according to tissues # compare DR vs. AL. profiles = Profile.objects.all() #print len(profiles) signatures = {} for profile in profiles: tissues = ' '.join([tissue.name for tissue in profile.tissue.all()]) print tissues, profile.diet.shortcut if tissues not in signatures: signatures[tissues] = [None, None] if profile.diet.shortcut == 'DR': signatures[tissues][0]= profile else: signatures[tissues][1] = profile print signatures for tissues, profiles in signatures.items(): print tissues, profiles signature = Signature(name=tissues, species=profiles[0].species, diet=profiles[0].diet) signature.save() for tissue in profiles[0].tissue.all(): signature.tissues.add(tissue) for profile in profiles: #background = [] profile.transcripts = {} probes = Probe.objects.filter(profile=profile) for probe in probes: if not probe.name.startswith('RANDOM'): transcript_name = probe.name.split('P')[0] if transcript_name not in profile.transcripts: profile.transcripts[transcript_name] = [probe.expression] else: profile.transcripts[transcript_name].append(probe.expression) #else: # For background subtraction. #background.append(probe.expression) for transcript_name, exp_expression in profiles[0].transcripts.items(): # If expression too low of e.g. 1/3 of probes, exclude probe. # RMA (background subtraction, quantile normalization, and median polishing) # Benjamini p-value exp = sum(exp_expression)/len(exp_expression) ctr_expression = profiles[1].transcripts[transcript_name] ctr = sum(ctr_expression)/len(ctr_expression) ratio = exp/ctr if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio if len(exp_expression) == 1 or len(ctr_expression) == 1: es = pvalue = None else: es = effect_size(exp_expression, ctr_expression) pvalue = t_two_sample(exp_expression, ctr_expression)[1] # Calculate p-value. transcript = Transcript(seq_id=transcript_name, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) transcript.save() expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) print('Done') return redirect('/expressions/signatures/')
def get_raw(self): seq_str = Transcript(self.transcript_id).get_sequence_str() # Use the ORM to grab compiled counts results = db_session \ .query(RawReactivities) \ .filter( RawReactivities.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id, RawReactivities.transcript_id==self.transcript_id ) \ .all() measurement_set = results[0] # minus_unpacked = # plus_unpacked = values_str_unpack_int(measurement_set.plus_values) cols = [ values_str_unpack_int(measurement_set.minus_values), values_str_unpack_int(measurement_set.plus_values) ] # Grab the raw replicate lanes data lanes = db_session \ .query(RawReplicateCounts) \ .filter( RawReplicateCounts.nucleotide_measurement_run_id==self.nucleotide_measurement_run_id, RawReplicateCounts.transcript_id==self.transcript_id ) \ .order_by( RawReplicateCounts.minusplus_id, RawReplicateCounts.bio_replicate_id, RawReplicateCounts.tech_replicate_id ) \ .all() # gather the data tech_rep_ids = set() for lane in lanes: cols.append(values_str_unpack_int(lane.values)) tech_rep_ids.add(lane.tech_replicate_id) # make headers headers = [] for lane in lanes: # tech replicate notation only added for experiments with > 1 tech replicate tech_str = "" if len(tech_rep_ids) == 1 else "_T" + str( lane.tech_replicate_id) headers.append( str(lane.minusplus_id) + "_B" + str(lane.bio_replicate_id) + tech_str) # Build and return the output buf = "position\tsequence\tsum_minus\tsum_plus\t" + "\t".join( headers) + "\n" for n in range(0, len(cols[0])): # add position and seq letter buf += str(n + 1) + "\t" + seq_str[n] for col in cols: # add the dynamic columns buf += "\t" + str(int(col[n])) buf += "\n" return buf
def build_entries(self, experiment_ids): from models import NucleotideMeasurementRun # Load experiments experiments = db_session \ .query(NucleotideMeasurementRun) \ .filter(NucleotideMeasurementRun.id.in_(experiment_ids)) \ .all() # Load measurements seq_str = str( Transcript(self.transcript_id).get_sequence(self.strain_id).seq) measurements_data = db_session \ .query(NucleotideMeasurementSet) \ .filter( NucleotideMeasurementSet.nucleotide_measurement_run_id.in_(experiment_ids), NucleotideMeasurementSet.transcript_id==self.transcript_id ) \ .all() data = {} # Populate experiment rows for experiment in experiments: experiment_data = { "id": experiment.id, "description": experiment.description, "data": [] } for n in range(len(seq_str)): # initialise the array experiment_data["data"].append({ "position": n, "nuc": seq_str[n], "measurement": None }) data[experiment.id] = experiment_data # Add measurements to each experiment json element # Loop since we might be dealing with > 1 measurement set for measurement_set in measurements_data: experiment_id = measurement_set.nucleotide_measurement_run_id measurements = values_str_unpack_float(measurement_set.values) for pos in range(0, len(measurements)): measurement = measurements[pos] data[experiment_id]["data"][pos]["measurement"] = measurement # For each experiment, check whether there is no data and set empty flags accordingly. self.empty = True # all empty flag for experiment_id in data: entry = data[experiment_id] empty = True for pos in entry["data"]: if pos["measurement"] != 0 and pos["measurement"] != None: empty = False self.empty = False if empty: del entry["data"] entry["empty"] = True else: entry["empty"] = False self.data_json = json.dumps(data)
def execute_gene(self, feature_rows, strain_id): features = {} sequence = None transcript = None gene_id = None min_start = None max_end = None for feature_row in feature_rows: # Loop through annotation rows in the gff file, all related to the current gene # keep track of start and end start = feature_row[3] end = feature_row[4] direction = "forward" if feature_row[6] == "+" else "reverse" chromosome_id = feature_row[0] feature_type = feature_row[2] attribs = feature_row[8].strip() # This causes bugs. # if feature_type == "gene": # Handle gene entries # gene_id = attribs.split(";")[0].split(":")[1] # grab the gene ID - we'll want this for later new_gene_id = self.find_attribs_value("ID=Gene", attribs) if new_gene_id != None: # only deal with proper genes. setting gene_id to None means nothing else will be processed. # so it will essentially skip non-"gene" entries. if feature_type != "gene": gene_id = None continue # Check against filter list if there is one if self.filter_genes != None and new_gene_id not in self.filter_genes: # filter list exists, and gene is not in filter list # skip this gene return gene_id = new_gene_id # add the Gene entry - if it hasn't been already if gene_id not in self.genes_seen: gene = Gene(gene_id) self.genes_to_write.append(gene) self.genes_seen[gene_id] = gene elif gene_id != None : # Handle transcript entries - if the gene is legit transcript_id = self.find_attribs_value("ID=Transcript", attribs) if transcript_id != None: # it's a transcript entry # add the Transcript entry - if it hasn't been already transcript_id = self.ensure_unique_transcript_id(transcript_id) if transcript_id not in self.transcripts_seen: transcript = Transcript( id=transcript_id, gene_id=gene_id ) self.transcripts_to_write.append(transcript) self.transcripts_seen[transcript.id] = transcript else: # Handle transcript feature entries # for some reason, features for a given strain/transcript # combination are not always added transcript_id = self.find_attribs_value("Parent=Transcript", attribs) if transcript_id != None: # it's a transcript feature entry # put a filter here? some elements are not worth storing? self.features_to_write.append(Feature( transcript_id=transcript_id, type_id=feature_row[2], strain_id=strain_id, chromosome_id=chromosome_id, start=start, end=end, direction=direction )) else: pass # this happens for pseudogenes and TEs - which we aint interested in
def add_signature(request): """The aim is to retrieve a list of differential expressed genes for certain criteria (e.g. fold_change, p-value, tissue). """ form = SignatureForm(request.POST or None, request.FILES or None) if request.POST: if not "file" in request.POST: file = request.FILES['file'] file.name = file.name.replace('.txt', '') data = file.read().replace('\r', '').split('\n') elif "profile" not in request: msg = "No file or profiles selected. Please provide either a signature "\ "file to upload or select profiles to derive a signature." messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Inferre descriptive informations from the filename: if file.name.startswith('name='): info = dict([item.split('=') for item in file.name.split(';')]) if 'tissue' in info: tissues = info['tissue'].replace('-', '@').replace( ', ', '@').replace(' and ', '@').split( '@') # @ is unlikely to be used as filename. else: tissues = request.POST.getlist('tissues') if 'diet' in request.POST and request.POST['diet']: regimen = Regimen.objects.get(pk=request.POST['diet']) elif "diet" in info: regimen = Regimen.objects.get(shortcut__exact=info['diet']) # Species from form: try: species = Species.objects.get(pk=request.POST['species']) except ValueError as e: msg = "Species not found in Denigma db. %s. Please select a species." % e messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Create signature: signature = Signature(name=request.POST['name'] or info['name'], diet=regimen, species=species) #, signature.save() # Adding tissues: for tissue in tissues: try: tissue = Tissue.objects.get( pk=tissue) #if it is selected from form except: print "Did not found tissue by pk." try: tissue = Tissue.objects.get( name__iexact=tissue ) # If it is inferred from file name. except Tissue.DoesNotExist as e: messages.add_message( request, messages.ERROR, ugettext("%s: %s" % (str(e)[:-1], tissue))) return redirect('/expressions/signature/add/') signature.tissues.add(tissue) print "Tissues:", signature.tissues.all() header = {} for index, column in enumerate(data[0].split('\t')): if "DR" in column: column = "exp" elif "AL" in column: column = "ctr" header[column.lower().replace('gene symbol', 'symbol')\ .replace('gene_symbol', 'symbol')\ .replace(' ', '_')\ .replace('platform_cloneid', 'seq_id')\ .replace('ensembl_gene', 'seq_id')] = index # WTF is this? #num_lines = len(data); counter = 0 print len(data[1:]) for line in data[1:]: #print(line) #print(header) try: #print("Trying") # For effect size ctr_values = [] exp_values = [] #counter += 1 if not line: continue columns = line.split('\t') if len(columns) < len(header): continue #break # seq_id = columns[header['seq_id']] symbol = columns[header['symbol']] if symbol == "None": symbol = None ctr = float(columns[header['ctr']]) exp = float(columns[header['exp']]) if "ratio" in header: ratio = float(columns[header['ratio']]) if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio else: ratio = float( columns[header['fold_change']]) # 2**exp/2**ctr if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio # Calculating effect size: for k, v in header.items(): if k.startswith('ctr') and k != 'ctr': ctr_values.append(float(columns[v])) elif k.startswith('exp') and k != 'exp': exp_values.append(float(columns[v])) # if exp_values and exp_values != ctr_values: # #print exp_values # es = effect_size(exp_values, ctr_values) # else: es = None # if 'pvalue' in header: # pvalue = columns[header['p_value']] # else: if exp_values != ctr_values: pvalue = t_two_sample(ctr_values, exp_values)[1] else: pvalue = 1 transcript = Transcript(seq_id=seq_id, symbol=symbol, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) transcript.save() #print(transcript.id, transcript.symbol, transcript.ratio) expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) #print expression except ValueError as e: print e, symbol, seq_id, fold_change, pvalue, ctr, exp #break #print "Counter=%s; Number of lines:%s" % (counter, num_lines) #if counter == num_lines: msg = "Successfully integrated signature: %s" % signature.name msg_type = messages.SUCCESS #else: # msg = "File upload failed." # msg_type = messages.ERROR messages.add_message(request, msg_type, ugettext(msg)) redirect('/expressions/signatures/') ctx = {'form': form, 'action': 'Add'} return render_to_response('expressions/signature_form.html', ctx, context_instance=RequestContext(request))
def create_signatures(request): """Generates signatures from profiles.""" # Sort profiles according to tissues # compare DR vs. AL. profiles = Profile.objects.all() #print len(profiles) signatures = {} for profile in profiles: tissues = ' '.join([tissue.name for tissue in profile.tissue.all()]) print tissues, profile.diet.shortcut if tissues not in signatures: signatures[tissues] = [None, None] if profile.diet.shortcut == 'DR': signatures[tissues][0] = profile else: signatures[tissues][1] = profile print signatures for tissues, profiles in signatures.items(): print tissues, profiles signature = Signature(name=tissues, species=profiles[0].species, diet=profiles[0].diet) signature.save() for tissue in profiles[0].tissue.all(): signature.tissues.add(tissue) for profile in profiles: #background = [] profile.transcripts = {} probes = Probe.objects.filter(profile=profile) for probe in probes: if not probe.name.startswith('RANDOM'): transcript_name = probe.name.split('P')[0] if transcript_name not in profile.transcripts: profile.transcripts[transcript_name] = [ probe.expression ] else: profile.transcripts[transcript_name].append( probe.expression) #else: # For background subtraction. #background.append(probe.expression) for transcript_name, exp_expression in profiles[0].transcripts.items(): # If expression too low of e.g. 1/3 of probes, exclude probe. # RMA (background subtraction, quantile normalization, and median polishing) # Benjamini p-value exp = sum(exp_expression) / len(exp_expression) ctr_expression = profiles[1].transcripts[transcript_name] ctr = sum(ctr_expression) / len(ctr_expression) ratio = exp / ctr if ratio < 1: fold_change = -(1 / ratio) else: fold_change = ratio if len(exp_expression) == 1 or len(ctr_expression) == 1: es = pvalue = None else: es = effect_size(exp_expression, ctr_expression) pvalue = t_two_sample(exp_expression, ctr_expression)[1] # Calculate p-value. transcript = Transcript(seq_id=transcript_name, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) transcript.save() expression = Expression.objects.create(signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, effect_size=es, pvalue=pvalue) print('Done') return redirect('/expressions/signatures/')
def add_signature(request): """The aim is to retrieve a list of differential expressed genes for certain criteria (e.g. fold_change, p-value, tissue). """ form = SignatureForm(request.POST or None, request.FILES or None) if request.POST: if not "file" in request.POST: file = request.FILES['file'] file.name = file.name.replace('.txt', '') data = file.read().replace('\r', '').split('\n') elif "profile" not in request: msg = "No file or profiles selected. Please provide either a signature "\ "file to upload or select profiles to derive a signature." messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Inferre descriptive informations from the filename: if file.name.startswith('name='): info = dict([item.split('=') for item in file.name.split(';')]) if 'tissue' in info: tissues = info['tissue'].replace('-', '@').replace(', ', '@').replace(' and ', '@').split('@') # @ is unlikely to be used as filename. else: tissues = request.POST.getlist('tissues') if 'diet' in request.POST and request.POST['diet']: regimen = Regimen.objects.get(pk=request.POST['diet']) elif "diet" in info: regimen = Regimen.objects.get(shortcut__exact=info['diet']) # Species from form: try: species = Species.objects.get(pk=request.POST['species']) except ValueError as e: msg = "Species not found in Denigma db. %s. Please select a species." % e messages.add_message(request, messages.ERROR, ugettext(msg)) return redirect('/expressions/signature/add/') # Create signature: signature = Signature(name=request.POST['name'] or info['name'], diet=regimen, species=species)#, signature.save() # Adding tissues: for tissue in tissues: try: tissue = Tissue.objects.get(pk=tissue) #if it is selected from form except: print "Did not found tissue by pk." try: tissue = Tissue.objects.get(name__iexact=tissue) # If it is inferred from file name. except Tissue.DoesNotExist as e: messages.add_message(request, messages.ERROR, ugettext("%s: %s" % (str(e)[:-1], tissue))) return redirect('/expressions/signature/add/') signature.tissues.add(tissue) print "Tissues:", signature.tissues.all() header = {} for index, column in enumerate(data[0].split('\t')): if "DR" in column: column = "exp" elif "AL" in column: column = "ctr" header[column.lower().replace('gene symbol', 'symbol')\ .replace('gene_symbol', 'symbol')\ .replace(' ', '_')\ .replace('platform_cloneid', 'seq_id')\ .replace('ensembl_gene', 'seq_id')] = index # WTF is this? #num_lines = len(data); counter = 0 print len(data[1:]) for line in data[1:]: #print(line) #print(header) try: #print("Trying") # For effect size ctr_values = [] exp_values = [] #counter += 1 if not line: continue columns = line.split('\t') if len(columns) < len(header): continue #break # seq_id = columns[header['seq_id']] symbol = columns[header['symbol']] if symbol == "None": symbol = None ctr = float(columns[header['ctr']]) exp = float(columns[header['exp']]) if "ratio" in header: ratio = float(columns[header['ratio']]) if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio else: ratio = float(columns[header['fold_change']]) # 2**exp/2**ctr if ratio < 1: fold_change = -(1/ratio) else: fold_change = ratio # Calculating effect size: for k,v in header.items(): if k.startswith('ctr') and k != 'ctr': ctr_values.append(float(columns[v])) elif k.startswith('exp') and k != 'exp': exp_values.append(float(columns[v])) # if exp_values and exp_values != ctr_values: # #print exp_values # es = effect_size(exp_values, ctr_values) # else: es = None # if 'pvalue' in header: # pvalue = columns[header['p_value']] # else: if exp_values != ctr_values: pvalue = t_two_sample(ctr_values, exp_values)[1] else: pvalue = 1 transcript = Transcript(seq_id=seq_id, symbol=symbol, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) transcript.save() #print(transcript.id, transcript.symbol, transcript.ratio) expression = Expression.objects.create( signature=signature, transcript=transcript, exp=exp, ctr=ctr, ratio=ratio, fold_change=fold_change, pvalue=pvalue, effect_size=es) #print expression except ValueError as e: print e, symbol, seq_id, fold_change, pvalue, ctr, exp #break #print "Counter=%s; Number of lines:%s" % (counter, num_lines) #if counter == num_lines: msg = "Successfully integrated signature: %s" % signature.name msg_type = messages.SUCCESS #else: # msg = "File upload failed." # msg_type = messages.ERROR messages.add_message(request, msg_type, ugettext(msg)) redirect('/expressions/signatures/') ctx = {'form': form, 'action': 'Add'} return render_to_response('expressions/signature_form.html', ctx, context_instance=RequestContext(request))