Beispiel #1
0
def run_search(dict_file, postings_file, queries_file, results_file):
    """
    using the given dictionary file and postings file,
    perform searching on the given queries file and output the results to a file
    """
    print('running search on the queries...')

    infile = open(dict_file, 'rb')
    new_dict = pickle.load(infile)
    infile.close()

    in_file = open(queries_file, 'r', encoding="utf8")
    out_file = open(results_file, 'w', encoding="utf8")
    query_list = in_file.read().splitlines()

    while query_list:
        query = query_list.pop(0)
        if (not query):
            out_file.write("")
        else:
            out_file.write(search(query, new_dict, postings_file))
        
        if query_list:
            out_file.write('\n')

    in_file.close()
    out_file.close()
def get_search_results(**kw):
    """ search the catalog and return the results

    The request may contain additional query parameters
    """
    query = make_query(**kw)
    return search(query)
Beispiel #3
0
def get_search_results(**kw):
    """ search the catalog and return the results

    The request may contain additional query parameters
    """
    query = make_query(**kw)
    return search(query)
Beispiel #4
0
def precision_at_k(k,
                   num_relevant,
                   query,
                   search_type,
                   operation_type,
                   input=False,
                   incorporate_pr="yes"):
    '''
    :param k: number of retrieved documents
    :param query:
    :param search_type:
    :param operation_type:
    :param input:
    :param incorporate_pr:
    :return: out of the k retrieved documents, how many are relevant
    '''
    retrieved = search(query,
                       search_type,
                       operation_type,
                       type_input=input,
                       incorporate_pr=incorporate_pr,
                       verbose=False,
                       num_results=k)

    # getting number of relevant documents
    relevant = util.get_nbest_results(query, num_relevant)
    print("{} relevant documents obtained".format(len(relevant)))

    # getting number of relevant documents that were retrieved
    relevant_retrieved_docs = [doc for doc in retrieved if doc in relevant]
    num_relevant_retrieved = len(relevant_retrieved_docs)
    print("{} relevant documents retrieved".format(num_relevant_retrieved))

    return num_relevant_retrieved / k
Beispiel #5
0
def search():
    if not authenticator.is_auth(flask.session):
        return flask.jsonify(status="auth", message="User is not authenticated")

    project_id = flask.request.form['project_id']
    q = flask.request.form['q']
    if flask.request.form['project_id'] is None:
        raise query.QueryException("Required parameter project_id not provided")
    return flask.jsonify(status="success", q=q, documents=query.summary(query.search(db(), authenticator.user_id(flask.session), project_id, q)))
Beispiel #6
0
    def post(self):
        q = self.get_body_argument(
            "message"
        )  #query is a the query string. needs to be parsed and indexed.

        #make the function calls here to get the ranked list. The list must be called results for it to be displayed on the web.

        results = search(q)

        #end of function calls to get ranked list

        self.set_header("Content-Type", "text/plain")
        self.render('listTemplate.html', title="Results",
                    results=results)  #list variable must be called results
Beispiel #7
0
def get_search_results(**kw):
    """Search the catalog and return the results

    :returns: Catalog search results
    :rtype: list/Products.ZCatalog.Lazy.LazyMap
    """

    # allow to search for the Plone site
    if kw.get("portal_type") == "Plone Site":
        return [get_portal()]
    elif kw.get("id") in PORTAL_IDS:
        return [get_portal()]
    elif kw.get("uid") in PORTAL_IDS:
        return [get_portal()]

    # build and execute a catalog query
    query = make_query(**kw)
    return search(query)
Beispiel #8
0
def get_search_results(**kw):
    """Search the catalog and return the results

    :returns: Catalog search results
    :rtype: list/Products.ZCatalog.Lazy.LazyMap
    """

    # allow to search for the Plone site
    if kw.get("portal_type") == "Plone Site":
        return [get_portal()]
    elif kw.get("id") in PORTAL_IDS:
        return [get_portal()]
    elif kw.get("uid") in PORTAL_IDS:
        return [get_portal()]

    # build and execute a catalog query
    query = make_query(**kw)
    return search(query)
Beispiel #9
0
def handler(event, context):
    s3 = boto3.client('s3')
    inspector = Inspector()
    inspector.inspectAll()
    inspector.addTimeStamp("FrameWorkRuntime")

    service = event.get("service")
    bucket = event.get("bucketname")
    key = event.get("filename")

    if service == 1:
        filename = '/tmp/target.csv'
        processed_file = '/tmp/processed.csv'
        upload_key = 'transform.csv'
        s3.download_file(bucket, key, filename)
        processed_data = trans.process(filename)
        processed_data.to_csv(processed_file, index=False)
        s3.upload_file(processed_file, bucket, upload_key)
        inspector.addAttribute("numLine", processed_data.shape[0])
        inspector.addAttribute("outputFile", upload_key)
    elif service == 2:
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        data_path = '/tmp/target.csv'
        db_path = '/tmp/' + key.split('.')[0] + '.db'

        s3.download_file = 'transform.csv'
        load.database_init(data_path, db_path, logger)
        s3.upload_file(db_path, bucket, 'target.db')
        inspector.addAttribute("DatabaseName", "target.db")
    elif service == 3:
        where_statement = event.get("where")
        group_statement = event.get("group")
        db_path = '/tmp/target.db'
        if not os.path.isfile(db_path):
            s3.download_file(bucket, key, db_path)
        result, lines = query.search(db_path, where_statement, group_statement)
        inspector.addAttribute("data", result)
        inspector.addAttribute("numLine", lines)
    else:
        raise NameError("There is no such service")

    inspector.inspectCPUDelta()
    return inspector.finish()
Beispiel #10
0
def search():
    region = request.args['region']
    max = request.args['max']
    min = request.args['min']
    result = query.search(region, min, max)
    current_page = request.args.get('page', 1, type=int)
    items_per_page = 5
    pages = round(len(result) / items_per_page + .499)
    from_page = int(current_page) * items_per_page - items_per_page
    upto_page = int(current_page) * items_per_page
    list_part = result[from_page:upto_page]

    images = []
    for adv in list_part:
        images.append(query.get_images(adv[0]))

    return render_template('search.html',
                           images=images,
                           advertisements=list_part,
                           pages=pages,
                           current_page=current_page,
                           region=region,
                           max=max,
                           min=min)
def submit(request):
    info = request.POST.get('info')
    url_list = search(info)
    return render(request, 'personal/home.html', {'search_string': url_list})
Beispiel #12
0
def ndcg_at_k(k,
              num_relevant,
              query,
              search_type,
              operation_type,
              input=False,
              incorporate_pr="yes"):
    '''

    :param k:
    :param num_relevant:
    :param query:
    :param search_type:
    :param operation_type:
    :param input:
    :param incorporate_pr:
    :return: normalized DCG value at k
    '''

    retrieved = search(query,
                       search_type,
                       operation_type,
                       type_input=input,
                       incorporate_pr=incorporate_pr,
                       verbose=False,
                       num_results=k)

    relevant = util.get_nbest_results(query, num_relevant)
    print("{} relevant documents obtained".format(len(relevant)))

    # defining the first 10 documents as highly relevant
    # (relvance 2), the remaining documents as relevant (relevance 1)
    # and all the rest are deemed irrelevant (relevance 0)
    # these values might have to be changed approprietely
    num_highly_relevant = 10
    num_relevant = num_relevant - 10

    highly_relevant = relevant[:num_highly_relevant]
    relevant = relevant[num_highly_relevant:]

    dcg = 0
    rank = 1
    for doc in retrieved:
        # find relevance of retrieved doc
        if doc in highly_relevant:
            rel_doc = 2
        elif doc in relevant:
            rel_doc = 1
        else:
            rel_doc = 0

        # calculate discount based on rank of document
        if rank == 1:
            discount = 1
        else:
            discount = log(rank, 2)

        # add discounted gain to DCG value
        dcg += rel_doc / discount

    # find number of retrieved documents by finding length of list (since it might be smaller than k)
    num_retrieved = len(retrieved)
    opt_dcg = calc_optimal_dcg(num_retrieved, num_highly_relevant,
                               num_relevant)

    # divide by optimal value to get normalized DCG
    ndcg = dcg / opt_dcg

    return ndcg


#print(precision_at_k(k=10,num_relevant=200,query="UCL",search_type="tfidf",operation_type="and"))
 def searchUsers(self, pattern):
     self.cur.execute(q.search(pattern))
     columns = [desc[0] for desc in self.cur.description]
     users = list(self.cur.fetchall())
     users.insert(0, columns)
     return users
Beispiel #14
0
def main():
	parser = argparse.ArgumentParser()

	# Currently only optional argument support (later add optional arguments using subparsers)
	parser.add_argument("-f", "--filename", help="you want to index a new file.")
	parser.add_argument("-d", "--dirname", help="recursively add a tag and description to each file in directory.")
	parser.add_argument("-st", "--searchtags", help="search for a file based on tags", action="store_true")
	parser.add_argument("-sd", "--searchdescription", help="search for a file based on its description", action="store_true")
	parser.add_argument("-sn", "--searchname", help="search for a file based on its name", action="store_true")
	args = parser.parse_args()


	# if a file name is provided then index this file
	if args.filename:
		name = args.filename
		tags, desc = checkValidity(name, 'file')

		if (tags, desc) == (None, None):
			return

		doc = {
			'filename': abspath(name),
			'filepath': path,
			'tags': tags,
			'description': desc
		}
		index (doc)

	# if a directory is provided then recursively walk the directory
	if args.dirname:
		dirname = args.dirname
		tags, desc = checkValidity(dirname, 'dir')

		if (tags, desc) == (None, None):
			return

		for root, dir, files in walk(abspath(dirname)):
			for name in files:
				print 'Now indexing file: ', root + '/' + name
				doc = {
					'filename': name,
					'filepath': root + '/' + name,
					'tags': tags,
					'description': desc
				}

				index(doc)


	# if search on tags requested
	if args.searchtags:
		try:
			tags = raw_input('tags to be searched [space separated]: ')
		except KeyboardInterrupt:
			print '\nExiting Gracefully...'
			return

		search('tags:' + tags)


	# if search on description requested
	if args.searchdescription:
		try:
			desc = raw_input('rough description of file: ')
		except KeyboardInterrupt:
			print '\nExiting Gracefully...'
			return

		search('description:' + desc)


	# if search on name requested
	if args.searchname:
		try:
			name = raw_input('filename to be searched: ')
		except KeyboardInterrupt:
			print '\nExiting Gracefully...'
			return

		search('filename:' + name)
Beispiel #15
0
    elif op == 'exit':
        exit()
    elif op == 'add':
        name = input('enter name: ')
        phone = input('enter phone: ')
        query.add(db, table_name, name=name, phone=phone)
    elif op == 'delete':
        name = input('enter name: ')
        query.delete(db, table_name, name)
    elif op == 'view':
        query.select(db, table_name)
    elif op == 'edit':
        var = input('update name or phone: ')
        if var == 'name':
            phone = input('enter the phone no of which name to be updated: ')
            new_name = input('enter the new name: ')
            query.update(db, table_name, phone=phone, where_name=new_name)
        if var == 'phone':
            name = input('enter the name whose number to be updated: ')
            new_phone = input('enter the new phone no: ')
            query.update(db, table_name, name=name, where_phone=new_phone)
    elif op == 'search':
        att = input('search by name or phone: ')
        if att == 'name':
            name = input('enter the name: ')
            query.search(db, table_name, name=name)
        if att == 'phone':
            num = input('enter the phone no: ')
            query.search(db, table_name, phone=num)

Beispiel #16
0
                revs = [x for x in revs]
                for rev in revs:
                    texts.append(rev.getText().encode('utf-8'))
                # cs = lcp.common_substring(texts, len(texts) * 0.9, 20)
                # print cs
                # missed = 0
                for ((revPrev,timePrev), (revNext,timeNext)) in gen_prev_next([(x, x.getTimestamp()) for x in revs], (None, int(time.time()))):

                    query['q'] = ' '.join(['"%s"' % x for x in escape_variables(revPrev.getText())])
                    if options.namespace:
                        query['q'] += ' namespace:%s' % options.namespace
                    timePrev = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(timePrev))
                    timeNext = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(timeNext))
                    query['q'] += ' timestamp:[%s TO %s]' % (timePrev, timeNext)
                    print >>sys.stderr, query
                    res = query_func.search('localhost', 8080, query)
                    print >>sys.stderr, res['hits_all']
                    inject_result(result, res)
                    #print revPrev.getText()
                    #print revPrev.getId(), timePrev, timeNext - timePrev, revPrev.getText()[:20]
                #     if revPrev.getText().find(cs.decode('utf-8')) < 0:
                #         print 'missed' , timeNext - timePrev, revPrev.getText()#, revPrev.getText().find(cs)
                #         missed += timeNext - timePrev
                #     else:
                #         print 'ok'
                # print missed

        except Exception as e:
            traceback.print_exc(file=sys.stderr)
        # restore the structure of result
        if options.revisions:
Beispiel #17
0
from query import search
from time import time as t

title_map = {}
with open("mapping.txt", encoding="utf8") as f:
    for line in f:
        doc_id = line.split()[0]
        try:
            title = " ".join(line.split()[1:])
        except:
            title = "id:" + line.split()[0] + ". Filename not found"
        title_map[int(doc_id)] = title

if __name__ == "__main__":
    print("Enter query to search and type '.' to end the loop")
    while True:
        string = input("Enter your query : ")
        if string.strip() == ".":
            break
        start = t()
        doc_list = search(string)
        if not doc_list:
            print("No results found")
        for k in doc_list:
            if k in title_map:
                print(title_map[k])
        print("Time taken:", t() - start)
Beispiel #18
0
def api_search(query, banyak):
    return search(query, int(banyak))
Beispiel #19
0
def search():
    args = request.values
    name = args.get('name', '')
    pageNum = int(args.get('pageNum', '1'))
    pageSize = int(args.get('pageSize', '10'))
    return query.search(name, pageNum, pageSize)