def extract_concepts(self): ''' Concepts are extracted from each document. for the extracted concepts, Predecessors are extracted. We need to keep 'Already Extracted' List of Go concepts so that we avoid extracting them again OR we use a try except when we are inserting the concept to avoid duplicate! we well pay the cost of duplicate extraction though! ''' db = DB() Collection._load() terminology_list = ["go"]#,"mesh","icd10","snomed"] extracted_doc = 0 for terminology in terminology_list: MaxMatcher = dict() for doc in Collection._documents: extracted_doc += 1 print "extracted_doc: ",extracted_doc , ' id:', doc.PMID document = doc.abstract # document is the abstract text. concepts = self._extract_concepts(document,terminology,MaxMatcher) if len(concepts)>0: concept_id_list = ','.join(concepts) if terminology == 'go': self.AddGeneOntologyConceptPredecessors(doc.PMID,concepts) query = "Insert into collection_go(PMID,go_id_list) values ('"+doc.PMID+"',' "+concept_id_list+"');" try: print query db.Query(query) except: print ""#"Unexpected error:", sys.exc_info()[0]
def quotes(): form = QuoteAddForm() form.collections.choices = [(collection.name, collection.name) for collection in Collection.select().where( Collection.user == current_user.get_id())] if form.validate_on_submit(): with db.atomic() as txn: quote = Quote.create( content=form.content.data, author=form.author.data, user=current_user.get_id(), ) for collection_name in form.collections.data: collection = Collection.get( Collection.name == collection_name, Collection.user == current_user.get_id(), ) QuoteCollection.create( quote=quote, collection=collection, ) return redirect(url_for('quotes')) else: quotes = Quote.select().where(Quote.user == current_user.get_id()) return render_template('quotes.html', form=form, quotes=quotes)
def data_onboard(data): s = new_session() getOrElse = lambda o, k, d: o[k] if k in o else d violation_code = Collection(ViolationCode, lambda vc: vc.code) borough = Collection(Borough, lambda b: b.borough) restaurant = Collection(Restaurant, lambda r: r.camis) inspection = [] for row in data: if 'boro' in row: borough.insert(Borough(borough=row['boro'])) if 'violation_code' in row: violation_code.insert(ViolationCode(code=row['violation_code'])) if 'camis' in row: restaurant.insert(Restaurant( camis=getOrElse(row, 'camis', None), dba=getOrElse(row, 'dba', None), street=getOrElse(row, 'street', None), phone=getOrElse(row, 'phone', None), cuisine_description=getOrElse(row, 'cuisine_description', None), borough=getOrElse(row, 'boro', None), building=getOrElse(row, 'building', None), zipcode=getOrElse(row, 'zipcode', None))) inspection.append(Inspection( restaurant=getOrElse(row, 'camis', None), record_date=getOrElse(row, 'record_date', None), violation_code=getOrElse(row, 'violation_code', None), violation_description=getOrElse(row, 'violation_description', None), score=int(row['score']) if 'score' in row else None, inspection_date=getOrElse(row, 'inspection_date', None), inspection_type=getOrElse(row, 'inspection_type', None), critical_flag=getOrElse(row, 'critical_flag', None))) s.bulk_save_objects(borough.getall()) s.bulk_save_objects(violation_code.getall()) s.commit() boroughs = set([r.borough for r in restaurant.getall()]) boroughs = {b[1]: b[0] for b in s.query(Borough.__table__.c.id, Borough.__table__.c.borough)} for r in restaurant.getall(): if r.borough != None: r.borough = boroughs[r.borough] bulk_save(s, restaurant.getall(), 1500) codes = set([i.violation_code for i in inspection]) codes = {c[1]: c[0] for c in s.query(ViolationCode.__table__.c.id, ViolationCode.__table__.c.code)} camis = set([r.camis for r in restaurant.getall()]) camis = {c[1]: c[0] for c in s.query(Restaurant.__table__.c.id, Restaurant.__table__.c.camis)} for i in inspection: if i.restaurant != None: i.restaurant = camis[i.restaurant] if i.violation_code != None: i.violation_code = codes[i.violation_code] bulk_save(s, inspection, 1500) s.close()
def collection(): args = request.json user_id = args['user_id'] collection_op_type = args['op_type'] collection_name = args['collection_name'] if collection_op_type == 'new': Collection.create(name=collection_name, user_id=user_id) elif collection_op_type == 'edit': Collection.get(args['collection_id']).update({'name': collection_name}) return ''
def signup(): form = SignupForm() if request.method == "POST" and form.validate(): username = form.username.data password = form.password.data email = form.email.data if User.find_by_username(username): flash('Sorry, but this username is already taken.', 'warning') else: user = User.get(User.new(username, password, email)) collection_id = Collection.create(name=form.collection_name.data or 'My collection', user_id=user.get_id()) user.update({'selected_collection': collection_id}) remember = form.remember_me.data if login_user(user, remember=remember): flash("Logged in!", 'success') return redirect(url_for("index")) else: flash("Sorry, but you could not log in.", 'danger') else: for field_name, error_messages in form.errors.items(): for err in error_messages: flash( 'error with {}: {}'.format(field_name.replace('_', ' '), err), 'danger') return render_template("signup.html", form=form)
def insert_collection(form): collection = Collection(form.name.data, form.sort_by.data, assigned_user_id=form.assigned_user_id.data, configuration_id=form.configuration_id.data) db.session.add(collection) db.session.flush() dirs = [ collection.get_record_dir(), collection.get_token_dir(), collection.get_video_dir(), collection.get_wav_audio_dir() ] # create dirs for tokens and records for dir in dirs: if not os.path.exists(dir): os.makedirs(dir) else: raise ValueError(""" For some reason, we are about to create a collection with the same primary key as a previous collection. This could happen for example if 2 databases are used on the same machine. If this error occurs, the current environment has to change the DATA_BASE_DIR, TOKEN_DIR, RECORD_DIR flask environment variables to point to some other place. Folder that already exists: {} Perhaps some of the source folders have never been created before. """.format(dir)) db.session.commit() return collection
def new(): if request.method == 'POST': # Load the JSON that's POSTed to the API data = json.loads(request.data.decode("utf-8")) # Create a new row in the collection table with this data collection = Collection(collection_json=data) db.session.add(collection) # Grab the unique ID that's generated (collection.id) # http://stackoverflow.com/questions/1316952 db.session.flush() # Convert this unique ID to a pretty hashid # i.e. db collection.id 50 -> Jxbo2jag hashid = hashids.encode(collection.id) # Commit these changes db.session.commit() # Return the hashid to the front end to be added to the URL return jsonify({'collection_id': hashid}) else: # OPTIONS request # Required for backbone.js, pre POST request resp = Response(status=200) resp.headers['Allow'] = 'POST' return resp
def add_collection(): name = request.form.get('name') or 'Без названия' collection = Collection(name=name, user_id=current_user.id) db.session.add(collection) db.session.commit() flash('Подборка успешно добавлена', 'success') return redirect(url_for('movies.collections', user_id=current_user.id))
def get_list(): url = URL(Collection.list_url()) # url = URL("/v1/collections") # print(url) res = requests.get(url) assert res.status_code == 200, res.status_code return res.json()["objects"]
def chrome_extension_redirect(): collection_id = request.args.get("id") url = request.args.get("url") collection = None if collection_id: collection = Collection.query.filter( Collection.id == collection_id).first() else: collection = Collection() user = g.user collection.creator = user collection.title = request.args.get('title') collection.append_child( CollectionItem(parent=collection, content=str(url))) db.session.add(collection) db.session.commit() return redirect(url) if collection and url and g.user == collection.creator: collection.append_child( CollectionItem(parent=collection, content=str(url))) cache_request(url) db.session.add(collection) db.session.commit() return redirect(url) else: #this should be improved return abort(404)
def get_item(id): # url = URL("/v1/collections") url = URL(Collection.details_url(id)) print(url) res = requests.get(url) assert res.status_code == 200, res.status_code return res.json()
def _get_collection_id(user_id, collection_name): if not collection_name: return None return Collection.find_first({ 'user_id': user_id, 'name': collection_name }).get_id() or None
def collection(collection_name): try: collection = (Collection.select(Collection, Quote).join( QuoteCollection, JOIN.LEFT_OUTER).join(Quote, JOIN.LEFT_OUTER).where( Collection.name == collection_name, Collection.user == current_user.get_id(), ).get()) except Collection.DoesNotExist: flash('Collection not found') return redirect(url_for('collections')) form = CollectionEditForm(obj=collection) if form.validate_on_submit(): if form.form_delete.data: collection.delete_instance(recursive=True) flash('Collection deleted.') return redirect(url_for('collections')) collection.name = form.name.data try: collection.save() except IntegrityError: flash('A collection with that name already exists.') return redirect(url_for('collections')) flash('Collection updated.') return redirect(url_for('collections')) else: return render_template( 'collection.html', form=form, collection=collection, )
def add_new_collection(): if not request.headers.get('Authorization'): abort(401) access_token = request.headers.get('Authorization').split(' ').pop() print('Access Token', access_token) collection_name = request.get_json()["name"] print(collection_name) decoded_access_token = jwt.get_unverified_claims(access_token) print('decoded_access_token', decoded_access_token) user_id = decoded_access_token["sub"] collection_id = uuid.uuid4().hex new_collection = Collection(id=collection_id, name=collection_name, owner=user_id, is_public=False) print(new_collection) new_collection.insert() return jsonify({ "code": 200, "data": { "id": collection_id, "name": collection_name } })
def get_collections(self): if not self.library_goosed: print("The library is not goosed. Call goose_up_library().") return False if not self.collections: collections_directory = "{0}/src/collections".format( self.base_location) collection_files = os.scandir(collections_directory) self.collections = [] for collection_file in collection_files: with open(collection_file.path) as cfp: collection_json = json.load(cfp) collection = Collection(title=collection_json["title"]) for album_path in collection_json["albums"]: album = Album() album_files = os.scandir(album_path) for album_file in album_files: track = Track.init_from_file(album_file.path) if track: album.tracks.append(track) album.title = track.album_title album.artist_name = track.artist_name collection.albums.append(album) self.collections.append(collection) return self.collections
def get(self, publisher_name, collection_name): publisher = Publisher.get_by_urlname(publisher_name) collection = Collection.get_by_urlname(collection_name, publisher.key) response = dict( publisher=simplejson.loads(publisher.json), collection=simplejson.loads(collection.json)) self.response.headers["Content-Type"] = "application/json" self.response.out.write(simplejson.dumps(response))
def add_collections_to_sender_form(sender, initial): form = sender collection_choices = tuple([ ( str(collection['pk']), collection['name'] ) for collection in Collection.query().values('pk', 'name', ) if collection['name'] ]) form.fields['collections'] = london.forms.ChoiceField(name='collections', widget=london.forms.SelectMultiple, choices=collection_choices, required=False) # FIXME: initial should already have collections initial['collections'] = [str(collection['pk']) for collection in Collection.query().filter(items__contains=form.instance.pk)]
def get(self, publisher_name): publisher = Publisher.get_by_urlname(publisher_name) collections = Collection.all_by_publisher(publisher.key) response = dict( publisher=simplejson.loads(publisher.json), collections=[simplejson.loads(x.json) for x in collections]) self.response.headers["Content-Type"] = "application/json" self.response.out.write(simplejson.dumps(response))
async def customers_list(limit: Optional[int] = Query(10, title="Page size"), page: Optional[int] = Query(1, title="Display page")): offset = (page - 1) * limit _ref = db.collection("customers").limit(limit).offset(offset) return Collection( items=[Customer.from_firebase(doc) for doc in _ref.stream()], meta=CollectionMeta(limit=limit, page=page))
def quote(quote_id): try: quote = Quote.get( Quote.id == quote_id, Quote.user == current_user.get_id(), ) except Quote.DoesNotExist: flash('Quote not found') return redirect(url_for('quotes')) quote_collections = [ collection.name for collection in Collection.select().join(QuoteCollection).where( QuoteCollection.quote == quote, ) ] quote.collections = quote_collections form = QuoteEditForm(obj=quote) form.collections.choices = [(collection.name, collection.name) for collection in Collection.select().where( Collection.user == current_user.get_id())] if form.validate_on_submit(): if form.id.data != quote_id: flash('Quote ID mismatch!') return redirect(url_for('quotes')) if form.form_delete.data: quote.delete_instance(recursive=True) flash('Quote deleted.') return redirect(url_for('quotes')) quote.content = form.content.data quote.author = form.author.data quote.save() if set(quote_collections) != set(form.collections.data): flash('Collections updated.') QuoteCollection.delete().where( QuoteCollection.quote == quote, ).execute() with db.atomic() as txn: for collection_name in form.collections.data: QuoteCollection.create( quote=quote, collection=Collection.get( Collection.name == collection_name, ), ) flash('Quote updated.') return redirect(url_for('quotes')) else: return render_template('quote.html', form=form)
def create_collection(token, collection_name): tokenValidator = validateToken(token) if not tokenValidator[0]: return {"error": "UnauthorizedError"}, 401 else: user = tokenValidator[1] c = Collection(collection_name=collection_name) user.update(add_to_set__my_collections=[c]) return {"message": "Collection Created Successfully"}, 200
def add_recipe(request): """ Adds recipe to user's collection. """ this_user_profile = UserProfile.objects.get(user=request.user) this_recipe_id = request.GET.get('recipe_id') this_recipe = Recipe.objects.get(id=this_recipe_id) has_recipe = this_user_profile.recipes.filter(id=this_recipe_id).first() if not has_recipe: new_collection = Collection(user_profile=this_user_profile, recipe=this_recipe) new_collection.save()
def retrieve_collection(self, collection_route, collection_size = None): collection_html_doc = get_html(self.core_route + collection_route) collection_soup = BeautifulSoup(collection_html_doc, 'html.parser') collection_title = collection_soup.find("div", class_ = "item-details-inner").find("div", class_="detailField").h1.text collection_items = self.get_items(collection_route, collection_size) return Collection(collection_title, collection_items)
def retrieve_csv_collection(self, collection_with_links): collection_title = collection_with_links['title'] collection_links = collection_with_links['links'] items = [] for link in collection_links: item = self.get_item(link) items.append(item) return Collection(collection_title, items)
def get(self, publisher_name, collection_name): publisher = Publisher.get_by_urlname(publisher_name) collection = Collection.get_by_urlname(collection_name, publisher.key) logging.info(str(collection)) records = Record.all_by_collection(collection.key) response = dict( publisher=simplejson.loads(publisher.json), collection=simplejson.loads(collection.json), records=[simplejson.loads(x.record) for x in records]) self.response.headers["Content-Type"] = "application/json" self.response.out.write(simplejson.dumps(response))
def collection_json(collection_name): try: collection = Collection.get(Collection.name == collection_name) except Collection.DoesNotExist: return jsonify({'message': 'Collection not found.'}), 404, cors_header quotes = (Quote.select( Quote.content, Quote.author, Quote.id, ).join(QuoteCollection).where(QuoteCollection.collection == collection, )) return jsonify({'quotes': list(quotes.dicts())}), cors_header
def _get_collection_items(name): result = [] try: for path in APPS_FOR_COLLECTION_APP: pk_items = Collection.query().get(name=name)['items'] items = get_model(path).query().filter(pk__in=pk_items) result.extend(items) if items.count() == len(pk_items): # all items are of one model: no need to search items in other models break except: pass return result
def delete_collection(): args = request.json user_id = args['user_id'] collection_id = args['collection_id'] selected_collection_id = User.get(user_id).selected_collection Collection.get(collection_id).delete() UserCard.filter({ 'collection_id': collection_id, 'user_id': user_id }).delete() if selected_collection_id == collection_id: User.get(user_id).update({ 'selected_collection': Collection.find_first({ 'user_id': user_id }).get_id() }) return ''
def Indexing(self): ''' IR Indexing Operations - Elimination of Stopwords - ''' DB._execute("DELETE from collection_index") print "Indexing is started..." tp = TextProcessor() Collection._load() Collection._load_tags() #loading document with PMID, tags and abstracts for doc in Collection._documents: index_list = [] for term in doc.abstract: index_list.append(term) if GlobalVariables.global_context_activated: for term in doc.tag: index_list.append(term) index_list = tp.EliminateStopWords(index_list) index_list = tp.Stem(index_list) doc.set_index(index_list) print "Indexing is Done!"
def setUp(self): """Creates test user and specimens""" db.drop_all() db.create_all() user1 = User.signup("tester1", "password1", None, None, None) user1id = 11 user1.id = user1id specimen1 = Specimen( link="https://i.imgur.com/pMkflKn.jpg", user_id=11, ) specimen1id = 12 specimen1.id = specimen1id specimen1taxonomy = Taxonomy( common_name="Red Oak", specimen_id=12, species="Quercus rubra", genus="Quercus", family="Fagaceae", order="Fagales", phylum="Tracheophyta", kingdom="Plantae", authorship="L.", ) specimen1details = Details( specimen_id=12, date="3-12-2019", location="Rock Bridge State Park", habitat="NE-facing slope", county="Boone", state="Missouri", notes="No Notes", ) collection1 = Collection( user_id=11, name="Test Collection", info="Here is some generic test text about this collection.", ) collection1id = 13 collection1.id = collection1id db.session.add_all( [specimen1, specimen1taxonomy, specimen1details, collection1]) db.session.commit()
def collections(): form = CollectionAddForm() if form.validate_on_submit(): with db.atomic() as txn: try: Collection.create( name=form.name.data, user=current_user.get_id(), ) except IntegrityError: flash('A collection with that name already exists.') return redirect(url_for('collections')) else: collections = (Collection.select( Collection, fn.COUNT(QuoteCollection.id).alias('quote_count'), ).join(QuoteCollection, JOIN.LEFT_OUTER).group_by(Collection).where( Collection.user == current_user.get_id(), )) return render_template( 'collections.html', form=form, collections=collections, )
def save_collections_from_sender_form(sender): form = sender obj_pk = str(form.instance['pk']) for choice in form.fields['collections'].choices: collection_pk = choice[0] collection = Collection.query().get(pk=collection_pk) collection_items = collection['items'] if collection_pk in form.cleaned_data['collections'] and obj_pk not in collection_items: collection_items.append(obj_pk) elif collection_pk not in form.cleaned_data['collections'] and obj_pk in collection_items: collection_items.remove(obj_pk) collection['items'] = collection_items collection.save()
def startArticle(): uid = request.form.get("uid") #用户id id = request.form.get("id") #文章id type = request.form.get("type") if (type == '0'): collection = Collection(article_id=id, uid=uid) else: collection = Ucollection(article_id=id, uid=uid) try: db.session.add(collection) db.session.commit() return (jsonify({'static': 1})) except Exception as e: print(e) return (jsonify({'static': 0}))
def init(self): self.article = Article() app.config.from_pyfile('config.cfg') ctx = app.app_context() ctx.push() db.init_app(app) self.logger = self.getLogger() # get this collection self.collection = db.session.query(Collection).filter( Collection.name == self.getSource()).first() if self.collection is None: self.collection = Collection() self.collection.name = self.getSource() self.saveCollection(self.collection)
def scan_script_collection(directory): logging.info('Scanning "{0}" for scripts'.format(directory)) collection = Collection.find_by_path(directory) if collection is None: logging.warning( 'There are no scripts to run because I am unable to find the collection "{0}" in the database.' ) return resource_count = collection.get_child_resource_count() logging.info('{0} scripts found in collection "{1}"'.format( resource_count, directory)) # TODO: Refactor this and combine it with the on_message() function. for resource in collection.get_child_resources(): url = resource.url driver = drivers.get_driver(url) script_contents = StringIO.StringIO() for chunk in driver.chunk_content(): script_contents.write(chunk) trigger_topic = meta_cassandra_to_cdmi(resource.metadata)['topic'] script = resource.name script_type = magic.from_buffer(script_contents.getvalue()) script_full_path = os.path.join(script_directory, script) script_path = os.path.dirname(script_full_path) logger.debug('Script "{0}" is apparently of type "{1}"'.format( script, script_type)) if script_type in ('Python script, ASCII text executable', 'ASCII text'): logger.info('{1} script "{0}" for topic "{2}" at "{3}"'.format( script, 'create', trigger_topic, script_full_path)) if not os.path.exists(script_path): os.makedirs(script_path) with open(script_full_path, 'w') as f: f.write(script_contents.getvalue()) scripts[script] = { 'topic': trigger_topic, 'path': script_full_path }
def api_create_collection(): data = request.get_json() print data if data.get('unique_id'): collection = Collection.query.filter_by( unique_id=data.get("unique_id")).first() collection.title = data.get('title', None) collection.is_public = data.get('is_public') collection.timestamp = datetime.utcnow() layout = data.get('layout') if layout == 'one': collection.collection_layout = 'col-sm-6 col-sm-offset-3' elif layout == 'two': collection.collection_layout = 'col-sm-6' elif layout == 'three': collection.collection_layout = 'col-sm-4' if g.user == collection.creator: collection.collection_items.delete() if data.get('items'): for item in data.get('items'): collection.append_child( CollectionItem(parent=collection, content=str(item))) cache_request(data.get('items')) db.session.add(collection) db.session.commit() else: collection = Collection() user = g.user if user: collection.creator = user collection.title = data.get('title', None) collection.is_public = data.get('is_public') layout = data.get('layout') if layout == 'one': collection.collection_layout = 'col-sm-6 col-sm-offset-3' elif layout == 'two': collection.collection_layout == 'col-sm-6' elif layout == 'three': collection.collection_layout = 'col-sm-4' for item in data["items"]: collection.append_child( CollectionItem(parent=collection, content=str(item))) db.session.add(collection) db.session.commit() return jsonify(ok=True, uri=str(url_for('board', unique_id=collection.unique_id)))
def scan_script_collection(directory): logging.info('Scanning "{0}" for scripts'.format(directory)) collection = Collection.find(directory) if collection is None: logging.warning( 'There are no scripts to scan because I am unable ' 'to find the collection "{0}" in the database.'.format(directory)) return child_container, child_dataobject = collection.get_child() resource_count = len(child_dataobject) logging.info('{0} scripts found in collection "{1}"'.format( resource_count, directory)) # TODO: Refactor this and combine it with the on_message() function. for resource_name in child_dataobject: resource = Resource.find("{}/{}".format(directory, resource_name)) script_contents = StringIO.StringIO() for chunk in resource.chunk_content(): script_contents.write(chunk) trigger_topic = resource.get_cdmi_metadata().get('topic', '') script = resource.name script_type = magic.from_buffer(script_contents.getvalue()) script_full_path = os.path.join(script_directory, script) script_path = os.path.dirname(script_full_path) logger.debug('Script "{0}" is apparently of type "{1}"'.format( script, script_type)) if script_type in ('Python script, ASCII text executable', 'ASCII text'): logger.info('{1} script "{0}" for topic "{2}" at "{3}"'.format( script, 'create', trigger_topic, script_full_path)) if not os.path.exists(script_path): os.makedirs(script_path) with open(script_full_path, 'w') as f: f.write(script_contents.getvalue()) scripts[script] = { 'topic': trigger_topic, 'path': script_full_path }
def retrive_documents(self,query_id): k1 = 1.2 k3 = 8.00 avg_dl = 122 b = 1 # from 0.25 to 2.00 increase 0.25 q = Query(query_id) #q.set_concepts(self.QueryConceptExtraction(q.text)) self._expand_query(q) return print "Retrieving Documents for: ", q.text Collection._load() Collection._load_go() Collection._load_tags() Collection._load_indexes() #Loads documents into _documents with PMID and Index score = dict() N = Collection._count Nt = dict() for term in q.text: Nt[term] = Collection._get_frequency(term) counter = 0 for doc in Collection._documents: summation = 0; dl = doc.length * 1.00 for t in q.text: tfn = doc.get_frequency(t) QQ = ' '.join(q.text) qtf = Document._term_frequency(QQ, t) K = k1*((1-b)+b*(dl/avg_dl)) w = log((N-Nt[t]+0.5)/(Nt[t]+0.5),2) if w<0: #this makes the result a negative number # if we break the result will be bigger than or equal to zero break p1 = (((k1+1)*tfn)/(K+tfn)) p2 = ((k3+1)*qtf/(k3+qtf)) p3 = w summation += p1*p2*p3 score[doc.PMID] = summation counter += 1
def newcollection(): now = datetime.datetime.now() form = NewCollection() if request.method == 'POST' and form.validate_on_submit(): collection = Collection.query.filter_by(year=now.year).first() if collection is None: db.session.add( Collection(colyear=form.colyear.data, coldate=now, col_orgCode=current_user.orgCode, year=now.year)) db.session.commit() flash(" Success! You have added a new collection!") return redirect(url_for('collection')) else: flash('Collection already exist!') return render_template('addcollection.html', form=form)
def get(collection_id): return Collection.get_visible_or_404(collection_id)
def delete(collection_id): Collection.delete(collection_id) return '', 204
def post(): return Collection.create(**get_request_json())
def get(): return list(Collection.get_all(request.values))
def get(collection_id): Collection.get_visible_or_404(collection_id) return list(Item.get_all(request.values, collection_id))
def get(collection_id): Collection.get_visible_or_404(collection_id) return list(Comment.objects(collection=collection_id))
def get(user_id): User.objects.get_or_404(id=user_id) params = MultiDict(request.values) params.setlist('owner', (user_id,)) return list(Collection.get_all(params))
def save_collection_items_from_sender_form(sender): obj, new = Collection.query().get_or_create(name=sender.instance) obj['items'] = sender.cleaned_data['items'] obj.save()
def _collection(self, obj, caller): try: return "\r\n".join(Collection.query().get(name=obj)["items"] or []) except Collection.DoesNotExist: return ""
def patch(collection_id): return Collection.patch(collection_id, **get_request_json())
def DocumentExpantion(self): ''' db.Query("delete from collection_concepts;")!!! BM25TermWeightingModel BM25 or Best Match algorithm, calculates the weight of each word in each extracted concept for the document ''' print "Calculating weights is started..." wieght_threshold = 0.10 tp = TextProcessor() ontology = Ontology() db = DB() db.Query("delete from collection_concepts;") Collection._load() Collection._load_go() N = Collection._count #Terminologies are ('go','mesh','icd10','snomed') corresponding with columns 2,3,4,5 T = ontology.GetDict('go') #bring all ontologies into the memory to be faster! doc_avg_len = 122 k1 = 1.2 b = 1.00 doc_counter = 0 print Collection._count # tuning parameters! for d in Collection._documents: doc_counter += 1 doc_len = d.length weight = dict() for C in d.go: C = C.replace(' ','') # Extract concept variants for C var = ' ' for variant in T[C]: var += ' {0} '.format(variant) terms = set( var.split(tp.WordSplitter())) tp.remove_values_from_list(terms,'') l = len(terms) sumation = 0 for term in terms: term_weight = 0 #calculate the weight tf = d.get_frequency(term) #Here goes calculating the weight n_k = Collection._get_frequency(term) tf = d.get_frequency(term) try: term_weight = tf * (( log10((N-n_k+0.50)/(n_k+0.50)) )/(k1+((1-b)+b) * (doc_len/doc_avg_len)+(tf))) except: pass #print "One here!++++++++++++++++++++++++++++++++++" sumation += term_weight if (sumation/l) > wieght_threshold: weight[C] = (1.00/l) * sumation # Store concepts and weights in the database, concepts and their weights are semi-colon separated values = '' ConceptList = [] for row in weight: row = row.replace(" ",'') for term in T[row]: ConceptList.append(term) if values == '': values = str(row) + ';' + str(weight[row]) else: values += ',' + str(row) + ';' + str(weight[row]) d.set_tag(ConceptList) #Adding tag tags to documents query = 'Insert into collection_concepts (PMID, Concepts) values({0}, "{1}")'.format(d.PMID,values) #print query db.Query(query) print "Calculating weights is Done! Concepts are added to Database"
def _expand_query(self,q): #--STEP 1----------Extract TOP DOCUMENTS ---------------------------- tp = TextProcessor() param = Parameter() k1 = 1.2 k3 = 8.00 avg_dl = 122 b = 1 # from 0.25 to 2.00 increase 0.25 Collection._load_indexes() # Loads indexes into _documents N = len(Collection._documents) score = dict() for D in Collection._documents: summation = 0; dl = D.length * 1.00 for t in q.text: Nt = Collection._get_frequency(t) tfn = D.get_frequency(t) qtf = q.get_frequency(t) K = k1*((1-b)+b*(dl/avg_dl)) w = log((N-Nt+0.5)/(Nt+0.5),2) if w<0: #this makes the result a negative number # if we break the result will be bigger than or equal to zero break p1 = (((k1+1)*tfn)/(K+tfn)) p2 = ((k3+1)*qtf/(k3+qtf)) p3 = w summation += p1*p2*p3 score[D.PMID] = summation M = param.GetDocNumberForLocalContext() TopDocs = [] TopNums = [] new_score = dict() for item in score.iterkeys(): if score[item] > 0: new_score[item] = score[item] for i in range(M): TopNums.append(0) TopDocs.append('') for D in score.iterkeys(): for i in range(M): if score[D] > TopNums[i]: for j in range(M-i-1): TopDocs[M-j-1] = TopDocs[M-j-2] TopNums[M-j-1] = TopNums[M-j-2] TopDocs[i] = D TopNums[i] = score[D] break Display._plot(new_score, q) TopDocsTexts = '' TopDocsTexts = tp.Tokenize(TopDocsTexts) TopDocsTexts = TextProcessor._remove_stop_words(TopDocsTexts) #---STEP 2---------Calculate weight of each term which is a member of new query---------------------------- K = TopDocsTexts Beta = 0.4 weight = dict() MaxTFQ = 0.001 for term in TopDocsTexts: tfq = q.get_frequency(term) if tfq > MaxTFQ: MaxTFQ = tfq tfqN = 0 MaxInfo = 0 for term in TopDocsTexts: Lambda = Document._term_frequency(' '.join(K), term) Freq_t_k = Document._term_frequency(' '.join(K), term) log1 = log(1.00/(1.00+Lambda),2) log2 = log(Lambda/(1.00+Lambda),2) InfoBO1 = -log1 - Freq_t_k * log2 if InfoBO1 > MaxInfo: MaxInfo = InfoBO1 for term in TopDocsTexts: Lambda = Document._term_frequency(' '.join(K), term) Freq_t_k = Document._term_frequency(' '.join(K), term) log1 = log(1.00/(1.00+Lambda),2) log2 = log(Lambda/(1.00+Lambda),2) InfoBO1 = -log1 - Freq_t_k * log2 tfq = q.get_frequency(term) tfqN = (tfq +0.00) /MaxTFQ if MaxInfo >0 : weight[term] = tfqN + Beta*(InfoBO1/MaxInfo) else: weight[term] = 0 QPrime = [] for term in weight.iterkeys(): if weight[term] > 0.25: QPrime.append(term) return QPrime