def main(): parser = argparse.ArgumentParser( description='Inverted index to search in documents.txt file.') parser.add_argument('query', type=str, help='Query to be process.') parser.add_argument('-b', '--build', action="store_true", help='Build the dictionary.') parser.add_argument('-s', '--save', action="store_true", help='Save the dictionary in the given file SAVE.') parser.add_argument('-t', '--time', action="store_true", help='Print execution time.') args = parser.parse_args() startB = timeit.default_timer() if (args.build): print("\nBuilding dictionary ...", end=' ') dictionary = PositionalInvertedIndex(PATH_TO_DOCUMENTS) dictionary.setStopWords(STOP_WORDS) docs = dictionary.build() print("Done.") else: print("\nLoading dictionary ...", end=' ') dictionary = loadDictionary(PATH_TO_DICTIONARY) print("Done.") stopB = timeit.default_timer() if (args.save): startS = timeit.default_timer() print("Saving ...", end=' ') dictionary.save(PATH_TO_DICTIONARY) print("Done.") stopS = timeit.default_timer() print() queryManager = QueryManager(dictionary) start = timeit.default_timer() result = queryManager.process(args.query) stop = timeit.default_timer() printResult(result) if (args.time): print("\nExecution time to load (or build) the dictionary: {:.3f}ms". format((stopB - startB) * 1000)) if (args.save): print("Execution time to save the dictionary: {:.3f}ms".format( (stopS - startS) * 1000)) print("Execution time for the request: {:.3f}ms".format( (stop - start) * 1000))
def __init__(self, q): self.qm = QueryManager(q) self.num_targets = self.qm.get_num_targets() self.target_names = self.qm.get_target_names() self.model_infos = [ self.qm.get_model_info(t) for t in self.target_names ] # m_method, m_type, top_k, bp, n_iter self.p_infos = [self.qm.get_protein_info(t) for t in self.target_names] self.a_infos = [self.qm.get_aptamer_info(t) for t in self.target_names] self.ps_infos = [ self.qm.get_protein_specificity_info(t) for t in self.target_names ] self.n_jobs = self.qm.get_num_jobs()
def search(request, ref=None): search_form = SearchForm() hits = {} rhits = {} if request.method == 'POST': form = SearchForm(request.POST) if form.is_valid(): queryManager = QueryManager() query = """SELECT ?hit, ?regexhit WHERE {{?hit <http://xmlns.com/foaf/0.1/familyName> '%(s)s' .} UNION {?hit <http://xmlns.com/foaf/0.1/givenName> '%(s)s' .} UNION {?hit <http://xmlns.com/foaf/0.1/name> '%(s)s' .} UNION {?regexhit <http://www.w3.org/2004/02/skos/core#prefLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION {?regexhit <http://www.w3.org/2004/02/skos/core#altLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION {?hit <http://www.w3.org/2004/02/skos/core#prefLabel> '%(s)s' .} UNION {?hit <http://www.w3.org/2004/02/skos/core#altLabel> '%(s)s' .} UNION {?regexhit <http://www.w3.org/2004/02/skos/core#hiddenLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION {?hit <http://www.w3.org/2004/02/skos/core#hiddenLabel> '%(s)s' }} LIMIT 100""" % {'s': form.cleaned_data['searchText']} r = queryManager.query(query) for person in r['results']['bindings']: level = 'hit' try: uri = person['hit']['value'] except: uri = person['regexhit']['value'] level = 'regex' uristr = str(uri) desc = queryManager.describe(uristr) suri = uristr.split('/')[-1] try: if level == 'hit': hits[suri] = desc[u'http://www.w3.org/2004/02/skos/core#prefLabel'][0][u'value'] elif not suri in hits: rhits[suri] = desc[u'http://www.w3.org/2004/02/skos/core#prefLabel'][0][u'value'] except: pass return render_to_response("search.tpl", {'form':search_form, 'hits': hits, 'rhits': rhits})
def __new__(cls, name, bases, dct): cls = super(ObjectMetaclass, cls).__new__(cls, name, bases, dct) cls.set_endpoint_root() cls.Query = QueryManager(cls) return cls
def login(username, passwd): login_url = '/'.join([API_ROOT, 'login']) return User(**User.GET(login_url, username=username, password=passwd)) @staticmethod def login_auth(auth): login_url = User.ENDPOINT_ROOT return User(**User.POST(login_url, authData=auth)) @staticmethod def request_password_reset(email): '''Trigger Parse\'s Password Process. Return True/False indicate success/failure on the request''' url = '/'.join([API_ROOT, 'requestPasswordReset']) try: User.POST(url, email=email) return True except: return False def _to_native(self): return dict([(k, ParseType.convert_to_parse(v, as_pointer=True)) for k, v in self._editable_attrs.items()]) def __repr__(self): return '<User:%s (Id %s)>' % (self.username, self.objectId) User.Query = QueryManager(User)
class Generator(): def __init__(self, q): self.qm = QueryManager(q) self.num_targets = self.qm.get_num_targets() self.target_names = self.qm.get_target_names() self.model_infos = [ self.qm.get_model_info(t) for t in self.target_names ] # m_method, m_type, top_k, bp, n_iter self.p_infos = [self.qm.get_protein_info(t) for t in self.target_names] self.a_infos = [self.qm.get_aptamer_info(t) for t in self.target_names] self.ps_infos = [ self.qm.get_protein_specificity_info(t) for t in self.target_names ] self.n_jobs = self.qm.get_num_jobs() def generate(self): with Manager() as manager: for i in range(0, self.num_targets, self.n_jobs): if i + self.n_jobs >= self.num_targets: model_infos = self.model_infos[i:] p_infos = self.p_infos[i:] t_names = self.target_names[i:] ps_infos = self.ps_infos[i:] else: model_infos = self.model_infos[i:i + self.n_jobs] p_infos = self.p_infos[i:i + self.n_jobs] t_names = self.target_names[i:i + self.n_jobs] ps_infos = self.ps_infos[i:i + self.n_jobs] L = manager.list() processes = [] for t_name, model_info, p_info, ps_info in zip( t_names, model_infos, p_infos, ps_infos): method, score_function, top_k, bp, n_iter = model_info p_name, p_seq = p_info ps_names, ps_seqs = ps_info print("> Target task name is {}".format(t_name)) print("- taret protein is {}".format(p_name)) print("- generative model {} with length {} bp".format( method, bp)) print("- score function : {}".format(score_function)) print("- generative model will save top {} candidates". format(top_k)) print( "- (when model is Apta-MCTS, number of iteration is {})" .format(n_iter)) print( "- proteins for checking binding specificity (#proteins: {})" .format(len(ps_names))) print("- target protein sequence") print_string_multilines(p_seq, 70) print("") if method == "Apta-MCTS": p = Process(target=self.apta_mcts, args=(L, t_name, p_seq, score_function, bp, top_k, n_iter, ps_names, ps_seqs)) elif method == "Lee_and_Han_2019": p = Process(target=self.leeandhan2019, args=(L, t_name, p_seq, score_function, top_k)) else: raise ValueError("unreachable error") p.start() processes.append(p) for p in processes: p.join() for t_name, candidates in L: self.qm.set_candidate_info(t_name, candidates) self.qm.update_and_reload() def apta_mcts(self, L, t_name, p_seq, score_function, bp, k, n_iter, ps_names, ps_seqs): G = Apta_MCTS(score_function) #candidate_aptamers = G.sampling(p_seq, bp, k, n_iter) # debugging # updated - considering binding specificity p_spes = (ps_names, ps_seqs) candidate_aptamers = G.sampling(p_seq, bp, k, n_iter, p_spes) # debugging # self.qm.set_candidate_info(t_name, candidate_aptamers) L.append((t_name, candidate_aptamers)) def leeandhan2019(self, L, t_name, p_seq, score_function, k): # fixed parameters n_samples, bp = 6000000, 27 G = RandomHeuristicSampling(score_function) G.pre_sampling(n_samples, self.n_jobs, bp) candidate_aptamers = G.post_sampling(p_seq, k) # self.qm.set_candidate_info(t_name, candidate_aptamers) L.append((t_name, candidate_aptamers))
qta_indexer.save_articles(path=output_path) else: qta_indexer.load_articles(path=output_path) qta_indexer.inverse_article_frequency() for title, article in qta_indexer.article_list.items(): num_query = len(article.query_list) num_paragraph = len(article.paragraph_list) if num_paragraph >= 20: print('%-32s -> Queries: %3d, Paragraphs: %3d' % (article.title, num_query, num_paragraph)) # Baseline Query Search ############################################################################################ query_manager = QueryManager(qta_indexer=qta_indexer, min_paragraph_number=20, top_document_number=10, keyword_number=50) save_search_engine(search_engine=query_manager.search_engine, path=output_path) print('Corpus size :', len(query_manager.corpus)) if initial_run: query_manager.search_queries() query_manager.save_queries(path=output_path) else: query_manager.load_queries(path=output_path) query_manager.clear_query_list(min_precision=0.2, min_recall=0.01) query_list = query_manager.query_list precisions = np.zeros(shape=(len(query_list), )) recalls = np.zeros(shape=(len(query_list), )) for i, query in enumerate(query_list):
return None return u @staticmethod def request_password_reset(email): '''Trigger Parse\'s Password Process. Return True/False indicate success/failure on the request''' url = '/'.join([API_ROOT, 'requestPasswordReset']) try: User.POST(url, email=email) return True except: return False def _to_native(self): return dict([(k, ParseType.convert_to_parse(v, as_pointer=True)) for k, v in self._editable_attrs.items()]) def __repr__(self): return '<User:%s (Id %s)>' % (self.username, self.objectId) User.Query = QueryManager(User) class Role(datatypes.Object): ENDPOINT_ROOT = '/'.join([API_ROOT, 'roles']) parse_table = '_Role' Role.Query = QueryManager(Role)
# # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from connection import API_ROOT from datatypes import ParseResource from query import QueryManager class Installation(ParseResource): ENDPOINT_ROOT = '/'.join([API_ROOT, 'installations']) class Push(ParseResource): ENDPOINT_ROOT = '/'.join([API_ROOT, 'push']) @classmethod def _send(cls, data, where=None, **kw): if where: kw['where'] = where return cls.POST('', data=data, **kw) @classmethod def alert(cls, data, where=None, **kw): cls._send(data, where=where, **kw) @classmethod def message(cls, message, where=None, **kw): cls._send({'alert': message}, where=where, **kw) Installation.Query = QueryManager(Installation)