def site_annotation_process(wiz, form): """In this form, each individual sites (only matched ones) are further annotated. For each site, TF-function (activator, repressor, etc.), quantitative value (if any) and experimental techniques used to identify site are requested from the curator.""" sites = session_utils.get(wiz.request.session, 'sites') techniques = session_utils.get(wiz.request.session, 'techniques') has_qdata = session_utils.get(wiz.request.session, 'has_quantitative_data') cd = form.cleaned_data for site in sites: i = site.key if not site.is_matched(): continue # Make sure all matched sites are in the annotation form assert '%d_site' % i in cd, "Inconsistent site annotation form" # Quantitative value if has_qdata: q = cd['%d_qval' % i] site.set_qval(float(q) if q else None) # TF function site.set_TF_function(cd['%d_TF_function' % i]) # TF_type site.set_TF_type(cd['%d_TF_type' % i]) # Experimental techniques site.clear_techniques() for j, t in enumerate(techniques): # Add technique to the site if it is checked. if cd['%d_technique_%d' % (i, j)]: site.add_technique(t) # Save sites again session_utils.put(wiz.request.session, 'sites', sites)
def site_entry_get_form(wiz, form): """Construct the form for site entry step.""" c = session_utils.get(wiz.request.session, 'previously_curated_paper') # if paper is previously curated, prepopulate fields if c: # pick any curation_site_instance object for this curation try: curation_site_instance = models.Curation_SiteInstance.objects.filter( curation=c).all()[:1].get() #form.fields['is_motif_associated'].initial = curation_site_instance.is_motif_associated form.fields['site_type'].initial = curation_site_instance.site_type except: pass # Delete session data, if user change any field and then come back, # Store users last entered data, instead of populated data. session_utils.put(wiz.request.session, "previously_curated_paper", None) # if not high-throughput mode, delete related fields if not session_utils.get(wiz.request.session, 'high_throughput_curation'): del form.fields['peaks'] del form.fields['assay_conditions'] del form.fields['method_notes'] del form.fields['peak_techniques'] else: # high-throughput mode # Populate peak techniques techniques = session_utils.get(wiz.request.session, 'techniques') choices = [(t.technique_id, t.name) for t in techniques] form.fields['peak_techniques'].choices = choices return form
def site_annotation_process(wiz, form): """In this form, each individual sites (only matched ones) are further annotated. For each site, TF-function (activator, repressor, etc.), quantitative value (if any) and experimental techniques used to identify site are requested from the curator.""" sites = session_utils.get(wiz.request.session, 'sites') techniques = session_utils.get(wiz.request.session, 'techniques') has_qdata = session_utils.get(wiz.request.session, 'has_quantitative_data') cd = form.cleaned_data for site in sites: i = site.key if not site.is_matched(): continue # Make sure all matched sites are in the annotation form assert '%d_site'%i in cd, "Inconsistent site annotation form" # Quantitative value if has_qdata: q = cd['%d_qval'%i] site.set_qval(float(q) if q else None) # TF function site.set_TF_function(cd['%d_TF_function'%i]) # TF_type site.set_TF_type(cd['%d_TF_type' %i]) # Experimental techniques site.clear_techniques() for j,t in enumerate(techniques): # Add technique to the site if it is checked. if cd['%d_technique_%d' % (i,j)]: site.add_technique(t) # Save sites again session_utils.put(wiz.request.session, 'sites', sites)
def high_throughput_curation(request): """Entry point for high-throughput curation. Curators can check ChIP and other high-throughput methodologies in the regular submission mode, but if they are submitting data that is primarily based on high-throughput binding assays (e.g. ChIP-seq genomic-SELEX, etc.) they are then encouraged to use the high-throughput submission portal. First few steps are identical to the ones in the regular submission portal. In the site-entry step, two types of data are asked: sites and peaks. As in regular submission portal, sites can be either motif-associated, non-motif-associated or variable-motif-associated (e.g. variable spacing, inverting, anything that is not gapless alignment), which can be entered in sequence-based or coordinate-based modes. Below the site box, curators are able to enter peak data (most likely in coordinate mode).""" # This IS high-throughput submission session_utils.put(request.session, 'high_throughput_curation', True) view = CurationWizard.as_view( [ PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm, ], condition_dict={'5': inexact_match_form_condition}) return view(request)
def site_entry_get_form(wiz, form): """Construct the form for site entry step.""" c = session_utils.get(wiz.request.session, 'previously_curated_paper') # if paper is previously curated, prepopulate fields if c: # pick any curation_site_instance object for this curation try: curation_site_instance = models.Curation_SiteInstance.objects.filter(curation=c).all()[:1].get() #form.fields['is_motif_associated'].initial = curation_site_instance.is_motif_associated form.fields['site_type'].initial = curation_site_instance.site_type except: pass # Delete session data, if user change any field and then come back, # Store users last entered data, instead of populated data. session_utils.put(wiz.request.session, "previously_curated_paper", None) # if not high-throughput mode, delete related fields if not session_utils.get(wiz.request.session, 'high_throughput_curation'): del form.fields['peaks'] del form.fields['assay_conditions'] del form.fields['method_notes'] del form.fields['peak_techniques'] else: # high-throughput mode # Populate peak techniques techniques = session_utils.get(wiz.request.session, 'techniques') choices = [(t.technique_id, t.name) for t in techniques] form.fields['peak_techniques'].choices = choices return form
def high_throughput_curation(request): """Entry point for high-throughput curation. Curators can check ChIP and other high-throughput methodologies in the regular submission mode, but if they are submitting data that is primarily based on high-throughput binding assays (e.g. ChIP-seq genomic-SELEX, etc.) they are then encouraged to use the high-throughput submission portal. First few steps are identical to the ones in the regular submission portal. In the site-entry step, two types of data are asked: sites and peaks. As in regular submission portal, sites can be either motif-associated, non-motif-associated or variable-motif-associated (e.g. variable spacing, inverting, anything that is not gapless alignment), which can be entered in sequence-based or coordinate-based modes. Below the site box, curators are able to enter peak data (most likely in coordinate mode).""" # This IS high-throughput submission session_utils.put(request.session, 'high_throughput_curation', True) view = CurationWizard.as_view([PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm,], condition_dict = {'5': inexact_match_form_condition}) return view(request)
def process_preview(self, request, form, context): cd = form.cleaned_data if 'pmid' in cd: # At this point, since pmid is validated in # pubform.PubmedPublicationForm.clean_pmid method, we're safe try: # check if publication is already in the database. p = Publication.objects.get(pmid=cd['pmid']) except Publication.DoesNotExist: # if not, add to the DB. pubrec = base.bioutils.get_pubmed(cd['pmid']) p = create_object.make_pub(pubrec, cd) else: # non-pubmed publication, create it manually pubrec = dict(Title=cd['title'], AuthorList=cd['authors'].split(','), FullJournalName=cd['journal'], PubDate=cd['publication_date'], Volume=cd['volume'], Issue=cd['issue'], Pages=cd['pages']) p = create_object.make_pub(pubrec, cd) # At this point, the Publication object is created, BUT it is not in DB yet. # Pass the object for review. context["pub"] = p session_utils.put(request.session, "publication", p)
def publication_process(wiz, form): """Post-process paper selection step.""" pubid = form.cleaned_data['pub'] session_utils.put(wiz.request.session, 'publication', int(pubid)) if form.cleaned_data["no_data"]: # mark paper as having no data paper = models.Publication.objects.get(publication_id=pubid) note = " \nPaper has no TF-binding site data." paper.submission_notes += note paper.curation_complete = True paper.save() session_utils.put(wiz.request.session, "paper_contains_no_data", True) return # If paper is previously curated, populate genome and TF information form # search DB if there is any curation object belonging to this publication p = models.Publication.objects.get(publication_id=pubid) # Check if the publication is previously curated cs = models.Curation.objects.filter(publication=p) if cs.count() >= 1: session_utils.put(wiz.request.session, "previously_curated_paper", cs[0]) else: session_utils.put(wiz.request.session, "previously_curated_paper", None)
def edit_curation(request, cid): """Handler function for curation editing. - Get curation being edited. - Create new form wizard with all initial data from old curation (except site matches data) - When new form is submitted, insert new curation instance to DB and remove the old one. """ # Get curation old_curation = models.Curation.objects.get(curation_id=cid) # Get initial data for new curation form initial = {'0': init_publication(old_curation), '1': init_genome_form(old_curation), '2': init_techniques_form(old_curation), '3': init_site_report_form(old_curation), '8': init_curation_review_form(old_curation), } # Tell form wizard that this is an edit to an existing curation session_utils.put(request.session, 'old_curation', old_curation) # Save which publication we are about to edit session_utils.put(request.session, 'publication', old_curation.publication.publication_id) # When doing a new curation, the first form is publication step. To help the # curator, if the publication is previously curated, some fields are # pre-populated, such as TF-type, TF-function, TF-species, etc. (assuming they # are same for all curations that belong to one publication). We shouldn't use # this feature, for edit_curation feature as the fields are populated with # curation data being edited. sesion_utils.put(request.session, "previously_curated_paper", None) wiz = CurationWizard.as_view([PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm], initial_dict=initial, condition_dict={'0': False}) return wiz(request)
def site_soft_match_process(wiz, form): """In this form, soft-search results are processed, user matched all of sites to any appropriate sequence found in the genome. Some sites might be unmatched if there is no any good candidate in search results.""" sites = session_utils.get(wiz.request.session, "sites") for site_id, match_id in form.cleaned_data.items(): site = [site for site in sites if site.key==site_id][0] if match_id != "None": # means this site is matched site.set_soft_match(match_id) # If high-throughput submission, try to match quantitative values in peak data to sites if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = session_utils.get(wiz.request.session, 'peaks') for site in sites: site.match_peak_data(peaks) # save the list of sites session_utils.put(wiz.request.session, "sites", sites)
def site_soft_match_process(wiz, form): """In this form, soft-search results are processed, user matched all of sites to any appropriate sequence found in the genome. Some sites might be unmatched if there is no any good candidate in search results.""" sites = session_utils.get(wiz.request.session, "sites") for site_id, match_id in form.cleaned_data.items(): site = [site for site in sites if site.key == site_id][0] if match_id != "None": # means this site is matched site.set_soft_match(match_id) # If high-throughput submission, try to match quantitative values in peak # data to sites if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = session_utils.get(wiz.request.session, 'peaks') for site in sites: site.match_peak_data(peaks) # save the list of sites session_utils.put(wiz.request.session, "sites", sites)
def edit_curation(request, cid): """Handler function for curation editing. - Get curation being edited. - Create new form wizard with all initial data from old curation (except site matches data) - When new form is submitted, insert new curation instance to DB and remove the old one. """ # Get curation old_curation = models.Curation.objects.get(curation_id=cid) # Get initial data for new curation form initial = { '0': init_publication(old_curation), '1': init_genome_form(old_curation), '2': init_techniques_form(old_curation), '3': init_site_report_form(old_curation), '8': init_curation_review_form(old_curation), } # Tell form wizard that this is an edit to an existing curation session_utils.put(request.session, 'old_curation', old_curation) # Save which publication we are about to edit session_utils.put(request.session, 'publication', old_curation.publication.publication_id) # When doing a new curation, the first form is publication step. To help the # curator, if the publication is previously curated, some fields are # pre-populated, such as TF-type, TF-function, TF-species, etc. (assuming they # are same for all curations that belong to one publication). We shouldn't use # this feature, for edit_curation feature as the fields are populated with # curation data being edited. sesion_utils.put(request.session, "previously_curated_paper", None) wiz = CurationWizard.as_view([ PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm ], initial_dict=initial, condition_dict={'0': False}) return wiz(request)
def site_entry_process(wiz, form): """Post process site entry step""" genomes = session_utils.get(wiz.request.session, "genomes") sites = site_entry.parse_input(form.cleaned_data["sites"].strip()) # find exact matches for site in sites: site.search_exact_match(genomes) # If any site has quantitative data, mark the curation has_qdata = any(site.qval for site in sites) # If high-throughput get peak data to save them as non-motif-associated data if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = site_entry.parse_input(form.cleaned_data['peaks'].strip()) techniques = models.ExperimentalTechnique.objects.filter( pk__in=form.cleaned_data['peak_techniques']) for peak in peaks: peak.search_exact_match(genomes) # if there is any match, select the first one by default if peak.get_exact_matches(): peak.set_exact_match(0) # for each peak add the technique peak.clear_techniques() for t in techniques: peak.add_technique(t) if any(peak.qval for peak in peaks): has_qdata = True session_utils.put(wiz.request.session, 'peaks', peaks) # save the list of sites session_utils.put(wiz.request.session, 'sites', sites) # save the type of lists session_utils.put(wiz.request.session, 'site_type', form.cleaned_data['site_type']) # save whether curation has quantitative data session_utils.put(wiz.request.session, 'has_quantitative_data', has_qdata) # If any quantitative data format save it qval = form.cleaned_data.get('quantitative_data_format', None) session_utils.put(wiz.request.session, 'quantitative_data_format', qval)
def site_entry_process(wiz, form): """Post process site entry step""" genomes = session_utils.get(wiz.request.session, "genomes") sites = site_entry.parse_input(form.cleaned_data["sites"].strip()) # find exact matches for site in sites: site.search_exact_match(genomes) # If any site has quantitative data, mark the curation has_qdata = any(site.qval for site in sites) # If high-throughput get peak data to save them as non-motif-associated data if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = site_entry.parse_input(form.cleaned_data['peaks'].strip()) techniques = models.ExperimentalTechnique.objects.filter(pk__in=form.cleaned_data['peak_techniques']) for peak in peaks: peak.search_exact_match(genomes) # if there is any match, select the first one by default if peak.get_exact_matches(): peak.set_exact_match(0) # for each peak add the technique peak.clear_techniques() for t in techniques: peak.add_technique(t) if any(peak.qval for peak in peaks): has_qdata = True session_utils.put(wiz.request.session, 'peaks', peaks) # save the list of sites session_utils.put(wiz.request.session, 'sites', sites) # save the type of lists session_utils.put(wiz.request.session, 'site_type', form.cleaned_data['site_type']) # save whether curation has quantitative data session_utils.put(wiz.request.session, 'has_quantitative_data', has_qdata) # If any quantitative data format save it qval = form.cleaned_data.get('quantitative_data_format', None) session_utils.put(wiz.request.session, 'quantitative_data_format', qval)
def curation(request): """Entry point for the curation.""" # If user selects the old curation and then go back, the session will have the # old_curation key in table, and it will cause trouble. if session_utils.has(request.session, 'old_curation'): session_utils.remove(request.session, 'old_curation') # This is not high-throughput submission session_utils.put(request.session, 'high_throughput_curation', False) view = CurationWizard.as_view([PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm,], condition_dict = {'5': inexact_match_form_condition}) return view(request)
def genome_process(wiz, form): """Post-process genome and TF selection step.""" genome_accession = form.cleaned_data['genome_accession'] # in form validation genome is searched in db, and if not found, # it is inserted into db. So, at this point, it is guaranteed that # genome with id <genome_accession> should be in db. genomes = [models.Genome.objects.get(genome_accession=genome_accession)] # Extra genome accession numbers (if any) for i in xrange(1, settings.NUMBER_OF_GENOME_ACCESSION_FIELDS): g = form.cleaned_data.get('genome_accession_%d' % i, None) if g: genomes.append(models.Genome.objects.get(genome_accession=g)) # store genome in session data session_utils.put(wiz.request.session, 'genomes', genomes) # store site species in session data session_utils.put(wiz.request.session, 'site_species', form.cleaned_data['site_species']) session_utils.put(wiz.request.session, 'TF_species', form.cleaned_data['TF_species']) # Set manuscript-related fields (contains_experimental_data and # contains_promoter_data). Actually, these fields are defined during adding # publication process, but the user is given a chance to edit these fields # during the curation process. pubid = session_utils.get(wiz.request.session, 'publication') p = models.Publication.objects.get(publication_id=pubid) p.contains_promoter_data = form.cleaned_data["contains_promoter_data"] p.contains_expression_data = form.cleaned_data["contains_expression_data"] p.save()
def site_exact_match_process(wiz, form): """Post process for site exact match step. Identify the sites that are matched by one of their possible matches and for the rest, perform a soft search which allows some substitutions when searching the sequence in the genome""" genomes = session_utils.get(wiz.request.session, "genomes") sites = session_utils.get(wiz.request.session, "sites") for site_id, match_id in form.cleaned_data.items(): site = [site for site in sites if site.key==site_id][0] if match_id != "None": # means this site is matched site.set_exact_match(match_id) else: # not matched, perform soft search site.search_soft_match(genomes) # If high-throughput submission, try to match quantitative values in peak data to sites if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = session_utils.get(wiz.request.session, 'peaks') for site in sites: site.match_peak_data(peaks) # save the list of sites session_utils.put(wiz.request.session, "sites", sites)
def site_exact_match_process(wiz, form): """Post process for site exact match step. Identify the sites that are matched by one of their possible matches and for the rest, perform a soft search which allows some substitutions when searching the sequence in the genome""" genomes = session_utils.get(wiz.request.session, "genomes") sites = session_utils.get(wiz.request.session, "sites") for site_id, match_id in form.cleaned_data.items(): site = [site for site in sites if site.key == site_id][0] if match_id != "None": # means this site is matched site.set_exact_match(match_id) else: # not matched, perform soft search site.search_soft_match(genomes) # If high-throughput submission, try to match quantitative values in peak # data to sites if session_utils.get(wiz.request.session, 'high_throughput_curation'): peaks = session_utils.get(wiz.request.session, 'peaks') for site in sites: site.match_peak_data(peaks) # save the list of sites session_utils.put(wiz.request.session, "sites", sites)
def curation(request): """Entry point for the curation.""" # If user selects the old curation and then go back, the session will have the # old_curation key in table, and it will cause trouble. if session_utils.has(request.session, 'old_curation'): session_utils.remove(request.session, 'old_curation') # This is not high-throughput submission session_utils.put(request.session, 'high_throughput_curation', False) view = CurationWizard.as_view( [ PublicationForm, GenomeForm, TechniquesForm, SiteEntryForm, SiteExactMatchForm, SiteSoftMatchForm, SiteAnnotationForm, GeneRegulationForm, CurationReviewForm, ], condition_dict={'5': inexact_match_form_condition}) return view(request)
def techniques_process(wiz, form): """Post-process experimental techniques step.""" techniques = models.ExperimentalTechnique.objects.filter(pk__in=form.cleaned_data['techniques']) # save selected techniques (to be used in site-annotation step) session_utils.put(wiz.request.session, 'techniques', techniques)
def techniques_process(wiz, form): """Post-process experimental techniques step.""" techniques = models.ExperimentalTechnique.objects.filter( pk__in=form.cleaned_data['techniques']) # save selected techniques (to be used in site-annotation step) session_utils.put(wiz.request.session, 'techniques', techniques)