Example #1
0
def main():

    parser = argparse.ArgumentParser(
        description='Inverted index to search in documents.txt file.')
    parser.add_argument('query', type=str, help='Query to be process.')
    parser.add_argument('-b',
                        '--build',
                        action="store_true",
                        help='Build the dictionary.')
    parser.add_argument('-s',
                        '--save',
                        action="store_true",
                        help='Save the dictionary in the given file SAVE.')
    parser.add_argument('-t',
                        '--time',
                        action="store_true",
                        help='Print execution time.')
    args = parser.parse_args()

    startB = timeit.default_timer()
    if (args.build):
        print("\nBuilding dictionary ...", end=' ')
        dictionary = PositionalInvertedIndex(PATH_TO_DOCUMENTS)
        dictionary.setStopWords(STOP_WORDS)
        docs = dictionary.build()
        print("Done.")
    else:
        print("\nLoading dictionary ...", end=' ')
        dictionary = loadDictionary(PATH_TO_DICTIONARY)
        print("Done.")
    stopB = timeit.default_timer()

    if (args.save):
        startS = timeit.default_timer()
        print("Saving ...", end=' ')
        dictionary.save(PATH_TO_DICTIONARY)
        print("Done.")
        stopS = timeit.default_timer()

    print()

    queryManager = QueryManager(dictionary)

    start = timeit.default_timer()
    result = queryManager.process(args.query)
    stop = timeit.default_timer()

    printResult(result)

    if (args.time):
        print("\nExecution time to load (or build) the dictionary: {:.3f}ms".
              format((stopB - startB) * 1000))
        if (args.save):
            print("Execution time to save the dictionary: {:.3f}ms".format(
                (stopS - startS) * 1000))
        print("Execution time for the request: {:.3f}ms".format(
            (stop - start) * 1000))
Example #2
0
 def __init__(self, q):
     self.qm = QueryManager(q)
     self.num_targets = self.qm.get_num_targets()
     self.target_names = self.qm.get_target_names()
     self.model_infos = [
         self.qm.get_model_info(t) for t in self.target_names
     ]  # m_method, m_type, top_k, bp, n_iter
     self.p_infos = [self.qm.get_protein_info(t) for t in self.target_names]
     self.a_infos = [self.qm.get_aptamer_info(t) for t in self.target_names]
     self.ps_infos = [
         self.qm.get_protein_specificity_info(t) for t in self.target_names
     ]
     self.n_jobs = self.qm.get_num_jobs()
def search(request, ref=None):
    search_form = SearchForm()
    hits = {}
    rhits = {}
    if request.method == 'POST':
        form = SearchForm(request.POST)
        if form.is_valid():
            queryManager = QueryManager()

            query = """SELECT ?hit, ?regexhit WHERE 
            {{?hit <http://xmlns.com/foaf/0.1/familyName> '%(s)s' .} UNION 
             {?hit <http://xmlns.com/foaf/0.1/givenName> '%(s)s' .} UNION 
             {?hit <http://xmlns.com/foaf/0.1/name> '%(s)s' .} UNION 
             {?regexhit <http://www.w3.org/2004/02/skos/core#prefLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION 
             {?regexhit <http://www.w3.org/2004/02/skos/core#altLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION 
             {?hit <http://www.w3.org/2004/02/skos/core#prefLabel> '%(s)s' .} UNION
             {?hit <http://www.w3.org/2004/02/skos/core#altLabel> '%(s)s' .} UNION
             {?regexhit <http://www.w3.org/2004/02/skos/core#hiddenLabel> ?name FILTER regex(?name, '%(s)s', "i")} UNION 
             {?hit <http://www.w3.org/2004/02/skos/core#hiddenLabel> '%(s)s'  }}
             LIMIT 100""" % {'s': form.cleaned_data['searchText']}

            r = queryManager.query(query)

            for person in r['results']['bindings']:
                level = 'hit'
                try:
                    uri = person['hit']['value']
                except:
                    uri = person['regexhit']['value']
                    level = 'regex'
                uristr = str(uri)

                desc = queryManager.describe(uristr)
                suri = uristr.split('/')[-1]
                try:
                    if level == 'hit':
                        hits[suri] = desc[u'http://www.w3.org/2004/02/skos/core#prefLabel'][0][u'value']
                    elif not suri in hits:
                        rhits[suri] = desc[u'http://www.w3.org/2004/02/skos/core#prefLabel'][0][u'value']
                except:
                    pass
            
    return render_to_response("search.tpl", {'form':search_form, 'hits': hits, 'rhits': rhits})
Example #4
0
 def __new__(cls, name, bases, dct):
     cls = super(ObjectMetaclass, cls).__new__(cls, name, bases, dct)
     cls.set_endpoint_root()
     cls.Query = QueryManager(cls)
     return cls
Example #5
0
    def login(username, passwd):
        login_url = '/'.join([API_ROOT, 'login'])
        return User(**User.GET(login_url, username=username, password=passwd))

    @staticmethod
    def login_auth(auth):
        login_url = User.ENDPOINT_ROOT
        return User(**User.POST(login_url, authData=auth))

    @staticmethod
    def request_password_reset(email):
        '''Trigger Parse\'s Password Process. Return True/False
        indicate success/failure on the request'''

        url = '/'.join([API_ROOT, 'requestPasswordReset'])
        try:
            User.POST(url, email=email)
            return True
        except:
            return False

    def _to_native(self):
        return dict([(k, ParseType.convert_to_parse(v, as_pointer=True))
                     for k, v in self._editable_attrs.items()])

    def __repr__(self):
        return '<User:%s (Id %s)>' % (self.username, self.objectId)


User.Query = QueryManager(User)
Example #6
0
class Generator():
    def __init__(self, q):
        self.qm = QueryManager(q)
        self.num_targets = self.qm.get_num_targets()
        self.target_names = self.qm.get_target_names()
        self.model_infos = [
            self.qm.get_model_info(t) for t in self.target_names
        ]  # m_method, m_type, top_k, bp, n_iter
        self.p_infos = [self.qm.get_protein_info(t) for t in self.target_names]
        self.a_infos = [self.qm.get_aptamer_info(t) for t in self.target_names]
        self.ps_infos = [
            self.qm.get_protein_specificity_info(t) for t in self.target_names
        ]
        self.n_jobs = self.qm.get_num_jobs()

    def generate(self):
        with Manager() as manager:
            for i in range(0, self.num_targets, self.n_jobs):
                if i + self.n_jobs >= self.num_targets:
                    model_infos = self.model_infos[i:]
                    p_infos = self.p_infos[i:]
                    t_names = self.target_names[i:]
                    ps_infos = self.ps_infos[i:]
                else:
                    model_infos = self.model_infos[i:i + self.n_jobs]
                    p_infos = self.p_infos[i:i + self.n_jobs]
                    t_names = self.target_names[i:i + self.n_jobs]
                    ps_infos = self.ps_infos[i:i + self.n_jobs]

                L = manager.list()
                processes = []
                for t_name, model_info, p_info, ps_info in zip(
                        t_names, model_infos, p_infos, ps_infos):
                    method, score_function, top_k, bp, n_iter = model_info
                    p_name, p_seq = p_info
                    ps_names, ps_seqs = ps_info

                    print("> Target task name is {}".format(t_name))
                    print("- taret protein is {}".format(p_name))
                    print("- generative model {} with length {} bp".format(
                        method, bp))
                    print("- score function : {}".format(score_function))
                    print("- generative model will save top {} candidates".
                          format(top_k))
                    print(
                        "- (when model is Apta-MCTS, number of iteration is {})"
                        .format(n_iter))
                    print(
                        "- proteins for checking binding specificity (#proteins: {})"
                        .format(len(ps_names)))
                    print("- target protein sequence")
                    print_string_multilines(p_seq, 70)
                    print("")

                    if method == "Apta-MCTS":
                        p = Process(target=self.apta_mcts,
                                    args=(L, t_name, p_seq, score_function, bp,
                                          top_k, n_iter, ps_names, ps_seqs))
                    elif method == "Lee_and_Han_2019":
                        p = Process(target=self.leeandhan2019,
                                    args=(L, t_name, p_seq, score_function,
                                          top_k))
                    else:
                        raise ValueError("unreachable error")

                    p.start()
                    processes.append(p)

                for p in processes:
                    p.join()

                for t_name, candidates in L:
                    self.qm.set_candidate_info(t_name, candidates)
                self.qm.update_and_reload()

    def apta_mcts(self, L, t_name, p_seq, score_function, bp, k, n_iter,
                  ps_names, ps_seqs):
        G = Apta_MCTS(score_function)
        #candidate_aptamers = G.sampling(p_seq, bp, k, n_iter) # debugging
        # updated - considering binding specificity
        p_spes = (ps_names, ps_seqs)
        candidate_aptamers = G.sampling(p_seq, bp, k, n_iter,
                                        p_spes)  # debugging

        # self.qm.set_candidate_info(t_name, candidate_aptamers)
        L.append((t_name, candidate_aptamers))

    def leeandhan2019(self, L, t_name, p_seq, score_function, k):
        # fixed parameters
        n_samples, bp = 6000000, 27
        G = RandomHeuristicSampling(score_function)
        G.pre_sampling(n_samples, self.n_jobs, bp)
        candidate_aptamers = G.post_sampling(p_seq, k)
        # self.qm.set_candidate_info(t_name, candidate_aptamers)
        L.append((t_name, candidate_aptamers))
Example #7
0
        qta_indexer.save_articles(path=output_path)
    else:
        qta_indexer.load_articles(path=output_path)
    qta_indexer.inverse_article_frequency()

    for title, article in qta_indexer.article_list.items():
        num_query = len(article.query_list)
        num_paragraph = len(article.paragraph_list)
        if num_paragraph >= 20:
            print('%-32s -> Queries: %3d, Paragraphs: %3d' %
                  (article.title, num_query, num_paragraph))

    # Baseline Query Search ############################################################################################

    query_manager = QueryManager(qta_indexer=qta_indexer,
                                 min_paragraph_number=20,
                                 top_document_number=10,
                                 keyword_number=50)
    save_search_engine(search_engine=query_manager.search_engine,
                       path=output_path)
    print('Corpus size :', len(query_manager.corpus))
    if initial_run:
        query_manager.search_queries()
        query_manager.save_queries(path=output_path)
    else:
        query_manager.load_queries(path=output_path)

    query_manager.clear_query_list(min_precision=0.2, min_recall=0.01)
    query_list = query_manager.query_list
    precisions = np.zeros(shape=(len(query_list), ))
    recalls = np.zeros(shape=(len(query_list), ))
    for i, query in enumerate(query_list):
Example #8
0
            return None
        return u

    @staticmethod
    def request_password_reset(email):
        '''Trigger Parse\'s Password Process. Return True/False
        indicate success/failure on the request'''

        url = '/'.join([API_ROOT, 'requestPasswordReset'])
        try:
            User.POST(url, email=email)
            return True
        except:
            return False

    def _to_native(self):
        return dict([(k, ParseType.convert_to_parse(v, as_pointer=True))
                     for k, v in self._editable_attrs.items()])

    def __repr__(self):
        return '<User:%s (Id %s)>' % (self.username, self.objectId)


User.Query = QueryManager(User)

class Role(datatypes.Object):
    ENDPOINT_ROOT = '/'.join([API_ROOT, 'roles'])
    parse_table = '_Role'

Role.Query = QueryManager(Role)
Example #9
0
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

from connection import API_ROOT
from datatypes import ParseResource
from query import QueryManager


class Installation(ParseResource):
    ENDPOINT_ROOT = '/'.join([API_ROOT, 'installations'])


class Push(ParseResource):
    ENDPOINT_ROOT = '/'.join([API_ROOT, 'push'])

    @classmethod
    def _send(cls, data, where=None, **kw):
        if where: kw['where'] = where
        return cls.POST('', data=data, **kw)

    @classmethod
    def alert(cls, data, where=None, **kw):
        cls._send(data, where=where, **kw)

    @classmethod
    def message(cls, message, where=None, **kw):
        cls._send({'alert': message}, where=where, **kw)

Installation.Query = QueryManager(Installation)