async def post(self, request): data = await request.json() group, _created = get_or_create(session, DomainGroup, name=data['name']) domains = [] if data.get('domains'): for domain_el in data.get('domains'): domain, _domain_created = get_or_create(session, Domain, domain=domain_el) domains.append({ 'id': domain.id, 'domain': domain_el, 'created': _domain_created }) return Response(status=200, body=self.resource.encode({ 'id': group.id, 'name': group.name, 'domains': domains, 'created': _created }), content_type='application/json')
def _parse_properties(self): properties = [] for i in xrange(len(self.data['dhead'])): term = self.data['dhead'][i][0] try: # Get the phase of the component phase = self.data['dhead'][i][1] except IndexError: # A fraction does not have a phase listed phase = None # Properties have two possible forms, with and without a unit # The unit is the last part after "comma space" in the string, e.g "Temperature, K" # Properties like Refractive index do not have a unit # This regex splits up the property into everything before and after the last "comma space" occurrence parts = re.search('(.*),\s(.*)', term) try: prop_name = parts.group(1) unit = parts.group(2) # Replace black small circle with UTF-8 middle dot for multiplication symbol unit = unit.replace('•', u'\u00B7') except AttributeError: # If no unit is found, the property is considered to be the full string prop_name = term unit = None # Add properties to self.database or get existing one prop, created = get_or_create(self.session, Property, name=prop_name, unit=unit, phase=phase) # Make sure we have the property ID if these are new properties self.session.commit() # Store the ID to use in measurement properties.append(prop.id) return properties
def _parse_components(self): components = [] component_ids = [] for i in xrange(len(self.data['components'])): component, created = get_or_create(self.session, Component, name=self.data['components'][i]['name']) if created: component.formula = self.data['components'][i]['formula'] # We need the component IDs, don't know what they are before adding to the db self.session.commit() # Store components in case we have to add a mixture later on components.append(component) component_ids.append(component.id) # See if components already share a mixture, should only ever be one mixture # Optimized query, SQLAlchemy was too slow. Query first gets list of mixtures that contains either component. # Then only the ones where mixture_id appears as much as there are components are kept. Finally an intersect is # done with the actual mixtures that have x components. This is necessary because a mixture with 3 components # can provide a false positive when only looking at 2 components, which won't be stripped out in the first query # IN ... takes CSV: (1, 2, 3) Converts components IDs to strings, then adds commas query = 'SELECT mixture_id FROM mixture_components ' \ 'WHERE component_id IN ('+", ".join(map(str, component_ids))+') ' \ 'GROUP BY mixture_id HAVING count(mixture_id) = :length ' \ 'INTERSECT ' \ 'SELECT mixture_id FROM mixture_components ' \ 'GROUP BY mixture_id HAVING count(mixture_id) = :length' shared_mixture = self.session.execute(text(query), params={'length': len(components)}).first() return components, shared_mixture
def test_raw_variable(self): session = db.Session() try: hd = CanCM4TestDataset() container = db.Container(session,hd) clean_units = get_or_create(session,db.CleanUnits,**hd.clean_units[0]) clean_variable = get_or_create(session,db.CleanVariable,**hd.clean_variable[0]) raw_variable = db.Field(hd,container,hd.variables[0],clean_units,clean_variable) session.add(raw_variable) session.commit() obj = session.query(db.Field).one() objects,simple = obj.as_dict() target = {'name': u'tas', 'cid': 1, 'cvid': 1, 'long_name': u'Near-Surface Air Temperature', 'standard_name': u'air_temperature', 'fid': 1, 'cuid': 1, 'units': u'K', 'type': u'variable', 'description': None} self.assertDictEqual(simple,target) self.assertEqual(obj.clean_units.standard_name,'K') finally: session.close()
def __init__(self, chat_id=None, *args, **kwargs): """ Crawler generic interface :param chat_id: optional parameter to specify where the crawler should be writing the results """ self.chat_id = chat_id self.last_posts = [] self.new_posts = [] self.web_page = get_or_create(WebPage, web_type=self.web_type, url=self.crawler_url)
async def post(self, request): data = await request.json() domain, _created = get_or_create(Domain, domain=data['domain']) return Response(status=200, body=self.resource.encode( { 'id': domain.id, 'domain': domain.domain, 'created': _created }, ), content_type='application/json')
def edit_entry(view, id): entry = db_session.query(Book).filter(Book.id==id).first() form = BookForm(request.form,obj=entry) if request.method == 'POST' and form.validate(): if view=='books': entry.authors = [] for author in form.authors.data : entry.authors.append(get_or_create(Author,name=author)) entry.name = form.name.data entry.description = form.description.data db_session.commit() flash('%s successfuly have edited' % entry.name) return redirect(url_for('admin_show_entries',pagin = app.config['PAGIN'], page=1, view=view)) flash('edited entry to %s database' % view) return render_template('add_entry.html', form=form, view=view,entry=entry, mode="edit")
def add_staff(self, id, workplace, name, sex): if self.session.query(db.Staff).get(id): print TermColors.WARNING, 'already!', TermColors.ENDC return dp = db.get_or_create(self.session, db.Department, name=workplace) # dp = db.Department(name=workplace) staff = db.Staff(id=id, name=name, gender=sex) staff.department = dp print TermColors.OKBLUE, 'inserted!', TermColors.ENDC self.session.add(staff) self.session.commit()
def add_entry(view): if view=='books': form = BookForm(request.form) else: form = AuthorForm(request.form) if request.method == 'POST' and form.validate(): if view=='books': entry = Book(name=form.name.data,description=form.description.data) for author in form.authors.data: entry.authors.append(get_or_create(Author,name=author)) else: entry = Author(str(form.name.data)) db_session.add(entry) db_session.commit() flash('%s successfuly added' % entry.name) return redirect(url_for('admin_show_entries',pagin = app.config['PAGIN'], page=1, view=view)) flash('Add new entry to %s database' % view) return render_template('add_entry.html', form=form, view=view,mode='add')
def scrape_proposal_page(proposal_url, file_number): """ Navigates to the page giving details about a piece of legislation, scrapes that data, and adds a model to the database session. Returns the new DB model. """ fetcher = LegistarNavigator() soup = fetcher.fetch( '%s/%s' % (BASE_SITE, proposal_url), 'file-%s' % (file_number)) try: file_number = int(extract_text(soup.find( id='ctl00_ContentPlaceHolder1_lblFile2'))) proposal_title = extract_text(soup.find( id='ctl00_ContentPlaceHolder1_lblTitle2')) proposal_type = extract_text(soup.find( id='ctl00_ContentPlaceHolder1_lblIntroduced2')) proposal_status = extract_text(soup.find( id='ctl00_ContentPlaceHolder1_lblStatus2')) introduction_date = parse_date(extract_text(soup.find( id='ctl00_ContentPlaceHolder1_lblIntroduced2'))) except: logging.warn('Unable to scrape proposal %s' % (file_number)) return db_proposal = db.Proposal(file_number, proposal_title) db_proposal.status = proposal_status db_proposal.proposal_type = proposal_type db_proposal.introduction_date = introduction_date blob = textblob.TextBlob(proposal_title) for blob_phrase in set(map(unicode, blob.noun_phrases)): db_proposal.noun_phrases.append( db.get_or_create(db.session, db.NounPhrase, phrase=blob_phrase)) db.session.add(db_proposal) db.session.commit() return db_proposal
def test(): engine = sqla.create_engine('sqlite:////tmp/test.db') db.Base.metadata.drop_all(engine) db.Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) session = Session() pipelineRunName = 'pipeline-randomwords-run-vxw5w' pipelineName = 'pipeline-randomwords' taskRunName = 'step-bacon' namespace = 'play' p = db.get_or_create(session, db.Pipelines, name=pipelineName, namespace=namespace) p2 = db.get_or_create(session, db.Pipelines, name="pipeline2", namespace=namespace) p3 = db.get_or_create(session, db.Pipelines, name="pipeline3", namespace=namespace) pr = db.get_or_create(session, db.Pipelineruns, name=pipelineRunName, namespace=namespace, start_time=datetime.datetime.now(), completion_time=datetime.datetime.now(), status=0, pipeline_id=p.id, json='{}') db.get_or_create(session, db.Pipelineruns, name="pr2", namespace=namespace, start_time=datetime.datetime.now(), completion_time=datetime.datetime.now(), status=0, pipeline_id=p2.id, json='{}') db.get_or_create(session, db.Pipelineruns, name="pipeline3", namespace=namespace, start_time=datetime.datetime.now(), completion_time=datetime.datetime.now(), status=0, pipeline_id=p3.id, json='{}') db.get_or_create(session, db.Pipelineruns, name="pipeline4", namespace=namespace, start_time=datetime.datetime.now(), completion_time=datetime.datetime.now(), status=0, pipeline_id=p.id, json='{}') # last one refer to first id created res = session.query(db.Pipelineruns, db.Pipelines).join(db.Pipelines).all() assert (res[-1][1].id == 1) tr1 = db.get_or_create( session, db.Taskruns, name=taskRunName, namespace=namespace, start_time=datetime.datetime.now(), completion_time=datetime.datetime.now(), pod_name="pod1", status=0, json='{}', pipelinerun_id=pr.id, ) start_time2 = datetime.datetime.now() completion_time2 = datetime.datetime.now() tr2 = db.get_or_create( session, db.Taskruns, name=taskRunName, namespace=namespace, start_time=start_time2, completion_time=completion_time2, pod_name="pod1", status=0, json='{}', pipelinerun_id=pr.id, ) assert (tr1.id != tr2.id) tr3 = db.get_or_create( session, db.Taskruns, name=taskRunName, namespace=namespace, start_time=start_time2, completion_time=datetime.datetime.now(), pod_name="pod2", status=0, json='{}', pipelinerun_id=pr.id, ) assert (tr3.id == tr2.id) tr2 = db.get_or_create( session, db.Taskruns, name=taskRunName, namespace=namespace, start_time=start_time2, completion_time=completion_time2, pod_name="pod1", status=0, json='{}', pipelinerun_id=pr.id, ) s1 = db.get_or_create( session, db.Steps, name="step1", namespace=namespace, taskrun_id=tr1.id, log="FOO BAR", ) s1c = db.get_or_create( session, db.Steps, name="step1", namespace=namespace, taskrun_id=tr1.id, log="FOO BAR", ) assert (s1.id == s1c.id) res = session.query( db.Pipelineruns).filter(db.Pipelineruns.name == 'pipeline4').first() assert (res.name == 'pipeline4')
#! /usr/bin/python import db import textblob if __name__ == '__main__': counter = 10 for instance in db.session.query(db.Proposal): print instance.title blob = textblob.TextBlob(instance.title) for np in map(unicode, blob.noun_phrases): phrase = db.get_or_create(db.session, db.NounPhrase, phrase=np) instance.noun_phrases.append(phrase) db.session.commit() counter -= 1 if not counter: break for np in db.session.query(db.NounPhrase): print '%s: %s' % (np.phrase, ','.join([str(p.file_number) for p in np.proposals]))
def _parse_ref(self): ref, created = get_or_create(self.session, Ref, full=self.data['ref']['full']) if created: ref.title = self.data['ref']['title'] return ref
continue page = aao.dump_page(kd) coll = sess.query(db.College).filter(db.College.name == name).first() if not coll: print TermColors.WARNING, "not in college db, ignore!", TermColors.ENDC continue else: print TermColors.OKGREEN, "found!", TermColors.ENDC for km, mname in aao.extract_major(page): mname = mname.replace(u"(", "(").replace(u")", ")") print ' ', km, mname major = db.get_or_create(sess, db.Major, no=km, name=mname, college=coll) year = 2014 # or other cls_page = aao.dump_page(kd, km, str(year)) all_classes = aao.extract_class(cls_page) if len(all_classes) == 0: sess.rollback() print TermColors.WARNING, "empty major, rollback!", TermColors.ENDC continue for kc, cname in all_classes: cname = cname.replace(u"(", "(").replace(u")", ")") cname = re.findall('(.*?)\[(.*?)\]', cname)[0][1] print ' ', kc, cname clss = db.get_or_create(sess, db.Class,
def store_pipelinerun(kwargs): session = Session() pipelineName = kwargs['body']['spec']['pipelineRef']['name'] pipelineRunName = kwargs['name'] namespace = kwargs['namespace'] start_time = kwargs['status']['startTime'] if 'completion_time' in kwargs['status']: completion_time = kwargs['status']['completionTime'] else: completion_time = None jeez = dict(kwargs['status']) jeez['namespace'] = namespace jeez['pipelineName'] = pipelineName jeez['pipelinerunName'] = pipelineRunName status = common.statusName("SUCCESS") if kwargs['status']['conditions'][0]['reason'].lower().startswith("fail"): status = common.statusName("FAILURE") pipeline = db.get_or_create(session, db.Pipelines, name=pipelineName, namespace=namespace) pipelinerun = db.get_or_create( session, db.Pipelineruns, name=pipelineRunName, namespace=namespace, start_time=dtparse.parse(start_time), completion_time=completion_time and dtparse.parse(completion_time), status=status, pipeline_id=pipeline.id, json=json.dumps(jeez), ) apiv1 = kubernetes.client.CoreV1Api() prinfo = kwargs['body'] for tr in prinfo['status']['taskRuns']: trinfo = prinfo['status']['taskRuns'][tr] trstatus = common.statusName("SUCCESS") trname = trinfo['pipelineTaskName'] if trinfo['status']['conditions'][0]['reason'].lower().startswith( "fail"): trstatus = common.statusName("FAILURE") podname = trinfo['status']['podName'] if 'completionTime' in trinfo['status']: completionTime = dtparse.parse(trinfo['status']['completionTime']) else: completionTime = None taskrun = db.get_or_create(session, db.Taskruns, name=trname, namespace=namespace, start_time=dtparse.parse( trinfo['status']['startTime']), completion_time=completionTime, pod_name=podname, status=trstatus, json=json.dumps(trinfo['status']), pipelinerun_id=pipelinerun.id) taskrun_id = taskrun.id for container in prinfo['status']['taskRuns'][tr]['status']['steps']: cntlog = apiv1.read_namespaced_pod_log( pretty=True, container=container['container'], name=podname, namespace=kwargs['namespace']) db.get_or_create( session, db.Steps, name=container['name'], namespace=namespace, taskrun_id=taskrun_id, log=cntlog, )
def _parse_listing(self, mixture, ref): listing, listing_created = get_or_create(self.session, Listing, url=self.url, ref=ref, mixture=mixture) mixture.listings.append(listing) return listing, listing_created
def get_or_create(self, model, **kwargs): return get_or_create(model, self.sess, **kwargs)