Esempio n. 1
0
    def absorb(repository, topic):
        # overarching repository creation
        os.mkdir(repository)
        move_files(topic, repository)
        mcdir(repository)
        file_list = generic_file_extraction(topic.files)
        for subtopic in topic.titles:
            for file in file_list:

                f_text = open(file, 'r+').read()
                mcdir(f"{file.replace('.txt', '')}")
                recorder = nlp(f_text)

                # record entities
                mcdir('Entities')
                cwd = os.getcwd()
                ent_list = list(recorder.ents)
                # inside the Entities repository
                for entity in ent_list:
                    # creating entity sub repositories based on label
                    mcdir(f'{entity.label_}')
                    mcdir(f'{entity}.txt')
                    # recording the entity in the sub repository
                    f = open(f'{entity}.txt', 'w+')
                    f.write(f'Entity Plain Text: {entity.text}\n')
                    f.write(f'Label: {entity.label_}\n')
                    pattern = re.compile('\W')
                    # sub('[^A-Za-z/ ]', '', n)
                    wp = search(re.sub(pattern, 'None', entity))
                    if wp is not None:
                        f.write(f'{wp.title} : {wp.url}\n')
                        f.write(f'{wikipedia.summary(wp.title, sentences=10)}')
                    os.chdir(cwd)
                chback(2)

                # record vocabulary
                mcdir('Vocabulary')
                for word in recorder:
                    if not word.is_stop and not os.path.exists(
                            f'{word}.txt'):  # duplicate entities
                        doc = open(f'{word}.txt', 'w+')
                        doc.write(f'{word}\n')
                        # prints pos + explanation, fine grain pos + explanation
                        doc.write(f'Part of Speech:{word.pos_:{6}} '
                                  f'{spacy.explain(word.pos_):{6}} '
                                  f'{spacy.explain(word.tag_)}\n')
                        doc.write(f'Stem: {VDE.stemmer.stem(word)}')
                        # record the stem of the word
                        search(word)
                    # look up the definition of a word in the search engine, etc
                chback(1)

                chback(1)
                os.rename(f'{file}', f"{file.replace('.txt', '')}/{file}")

        os.chdir(repository)
Esempio n. 2
0
    def load_wiki(self, page):

        if not self.valid_wiki(page.url):
            return

        pwiki = '//div[@class="mw-parser-output"]/*'
        xwiki = engine.Path(pwiki, link=False)

        results = engine.search(page, [xwiki])

        if len(results) == 0:
            return

        rwiki = results[0]
        wiki = ""

        for child in rwiki:
            if self.header.match(child.tag):
                wiki += "\n"
                wiki += "-" * 80
                wiki += "\n"
                wiki += etree.tostring(child, method="text", encoding="UTF-8")
                wiki += "-" * 80
                wiki += "\n"
            if child.tag.decode("ascii", "ignore") == "p":
                wiki += etree.tostring(child, method="text", encoding="UTF-8")

        self.wiki = wiki
Esempio n. 3
0
def worker(api, friends):

    timestamp_start = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_start = str((threading.currentThread().getName(), 'Launched'))
    start = log_start + ':' + timestamp_start
    print start

    for friend in friends:
        engine.index(INDEX, 'user', friend.id, social.userToJSON(friend))

        request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":friend.screen_name}}}

        docs = engine.search(INDEX, 'tweet', request)
        if (len(docs["hits"]["hits"]) > 0):
            since_id = str(docs["hits"]["hits"][0][u'_id'])
        else:
            since_id = None

        tweets = social.GetTweets(api, friend.screen_name, since_id)

        for tweet in tweets:
            engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))

    timestamp_end = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_end = str((threading.currentThread().getName(), 'Finishing'))
    end = log_end + ':' + timestamp_end
    print end
    return
Esempio n. 4
0
    def load_answers(self, page):

        if not self.valid_url(page.url):
            return

        paccepted = '//div[@class="answer accepted-answer"]'
        panswers = '//div[@class="answer"]'
        panswer = '/table/tr/td[@class="answercell"]/div[@class="post-text"]'

        xaccepted = engine.Path(paccepted + panswer, link=False)
        xanswers = engine.Path(panswers + panswer, link=False)

        results = engine.search(page, [xaccepted, xanswers])

        self.results = None

        raccepted = results[0]
        ranswers = results[1]

        answers = []
        accepted = None

        if len(raccepted) > 0:
            accepted = etree.tostring(raccepted[0],
                                      method="text",
                                      encoding="UTF-8")
        for answer in ranswers:
            answers.append(
                etree.tostring(answer, method="text", encoding="UTF-8"))

        self.results = Answers("unimplemented", accepted, answers)
Esempio n. 5
0
 def search(self):
     results = engine.search(self.searchbox.text())
     self.clear()
     for p in results:
         self.listWidget.addItem(p)
         for a in engine.posts[p]:
             self.listWidget.addItem(a)
Esempio n. 6
0
def result():
    if request.method == 'POST':
        address = request.form['q']
        res = search(address)
        results = filter_results(res)
        return render_template("results.html", results=results)
    return render_template('index.html')
    def search(self):
        '''Searches the database for relevant pages, then updates the results box
        to show any matching results
        '''
        # Clear the previous search results
        self.results_box.clear()

        # search index for documents/urls relevant to searh_query
        global FILE_URL_MAP
        search_query = self.search_box.text()
        results, num_results = engine.search(search_query)
        url_list = []
        if len(results[0]) == 0:
            return
        WEBPAGES_RAW_NAME = "WEBPAGES_RAW"
        JSON_FILE_NAME = os.path.join(".", WEBPAGES_RAW_NAME, "bookkeeping.json")
        FILE_URL_MAP = json.load(open(JSON_FILE_NAME), encoding="utf-8")
            
        cleaner = Cleaner()
        for filename in results:
            with open(os.path.join(".", WEBPAGES_RAW_NAME, filename),"rb") as fp:
                html_string = fp.read()
                cleaned_document = lxml.html.document_fromstring(cleaner.clean_html(html_string))
                #print((FILE_URL_MAP[filename],cleaned_document.text_content()))
                url_list.append((FILE_URL_MAP[filename],cleaned_document.text_content()))
        for i in url_list:
            print(i[0])
            
        #results = 5 * [(search_query, f"www.{search_query}.com", "example cont\nent")]
        self.show_results(search_query, url_list, num_results)
Esempio n. 8
0
def index():
    query = request.args.get("q")
    if query:
        ctx = engine.search(query)
        if ctx['redirect']:
            return redirect(ctx['redirect'])
        return render_template("results.html", **ctx)
    return render_template("index.html")
Esempio n. 9
0
    def load_results(self, page):

        xsection = engine.Path('//div[@class="section"]',link=False)
        results = engine.search(page, xsection)

        if len(results) != 1:
            return

        self.instruction = etree.tostring(results[0],method="text",encoding="UTF-8")
Esempio n. 10
0
def runtime_game_handler(nickname, player_class, world):   
    # Create a new player and get his UID.
    player_uid = world.add_player(nickname)
    print "Welcome to this wonderful game!"
    # Store the UID
    CLIENTS.append((player_uid, 'Local Player'))
    # Join the player's selected class
    sel_class = "JOIN;"+player_class
    world.set_player_attr(player_uid, 1, 'character', sel_class)
    #!TODO: Sheit buus, protams, jaizmaina no raw_input uz pogu!
    playing = True
    while playing:
        message = raw_input('command: > ').upper()
        m_split = message.split(';')
        if m_split[0] == 'JOIN':
            # Join the game with the according character class
            world.set_player_attr(player_uid, 1, 'character', message)
        elif m_split[0] == 'PRINT':
            '''Server always sends back the players visibility
                area depending on player's class.
            '''  
            gamefield_arr = world.gamefield_scope(player_uid)
            print gamefield_arr
        elif m_split[0] == 'PRINTALL':    
            # Print out the whole gamefield. For debugging purposes.
            print ''
            print ''
            world.print_array() # For server side debugging
        elif message == 'U':
            data = 'UP'
            world.set_player_attr(player_uid, 1, 'nextmove', data)
        elif message == 'D':
            data = 'DOWN'
            world.set_player_attr(player_uid, 1, 'nextmove', data)
        elif message == 'L':
            data = 'LEFT'
            world.set_player_attr(player_uid, 1, 'nextmove', data)
        elif message == 'R':
            data = 'RIGHT'
            world.set_player_attr(player_uid, 1, 'nextmove', data)
        elif m_split[0] == 'SCORE':
            player = engine.search(world.players, 'uid', player_uid) # The player's dictionary
            score = player['score']
            print score
        elif m_split[0] == 'PLIST':
            msg = str(world.players)
            print msg
        elif message == 'QUIT':
            # Remove the player from the world
            world.del_player(player_uid)
            # Remove the client from the list.
            for client in CLIENTS:
                if client[0] == player_uid: 
                    CLIENTS.remove(client)
            print "Client disconnected at: %s" % (':'.join(map(str, client))), time.asctime()       
            playing = False
Esempio n. 11
0
    def load_results(self, page):

        xtitle = engine.Path('//h3[@class="r"]/a/text()',link=False)
        xlink = engine.Path('//h3[@class="r"]/a/@href')
        results = engine.search(page, [xtitle,xlink])

        self.titles = []
        self.links = []

        for t,l in zip(results[0], results[1]):
            self.titles.append(t)
            self.links.append(l)
Esempio n. 12
0
 def search_time_limit(self, amount_of_time=5):
     position = engine.start_state()
     for _ in range(20):
         position.push(list(position.legal_moves)[0])
     self.logger.info("Searching for %d seconds", amount_of_time)
     timestamp = time.time()
     _, _ = engine.search(position, seconds=amount_of_time)
     timestamp = time.time() - timestamp
     difference = timestamp - amount_of_time
     self.logger.info("Search took %d seconds", timestamp)
     self.logger.info("Difference is %d seconds", difference)
     self.assertGreaterEqual(.1, difference)  # allow .1s of passed time
Esempio n. 13
0
    def load_search(self, page):

        if not self.valid_search(page.url):
            return

        presult = '//div[@class="mw-search-result-heading"]/a'
        xresult = engine.Path(presult, link=False)

        results = engine.search(page, xresult)

        for result in results:
            print "UNIMPLEMENTED, ADD SUBTREE TO ENGINE"
            print result.attrib['href'], result.attrib['title']
Esempio n. 14
0
    def load_questions(self, page):

        if not self.valid_search(page.url):
            return

        psummary = '//div[@class="summary"]/div[@class="result-link"]'
        xsummary = engine.Path(psummary, link=False)

        plink = "./span/a/@href"
        ptitle = "./span/a/text()"

        ptext = "./span"

        xtitle = engine.Path(ptitle, link=False)
        xlink = engine.Path(plink, link=True)

        results = engine.search(page, xsummary)

        if len(results) == 0:
            writeln("[-] no results found")
            return

        if not isinstance(self.results, Questions):
            self.results = Questions("ERR", [])

        for summary in results:
            page = engine.Page(page.url, tree=summary)

            link = engine.search(page, xlink)
            title = engine.search(page, xtitle)

            if len(link) == 0 or len(title) == 0:
                continue

            link = link[0].strip()
            title = title[0].strip()[3:]

            self.results.questions.append(Question(title, link))
Esempio n. 15
0
    def do_next(self, args):

        if not self.page:
            writeln("[-] no results loaded!")
            return
        xpath = engine.Path('//a[@id="pnnext"]/@href')
        n = engine.search(self.page, xpath)

        if not n:
            writeln("[-] no more results!")
            return

        self.page = engine.Page(n[0], load=True)
        self.lpr(self.page)
Esempio n. 16
0
def home():
    query = request.args.get('query')
    fields = request.args.getlist('fields')
    genres = request.args.getlist('genres')
    page = request.args.get('page')
    
    if not page:
        page = 1
    
    if not query:
        return render_template('index.html')
    else:
        results = engine.search(query, fields, genres, int(page))
        
        return render_template('results.html', results=results)
Esempio n. 17
0
    def post(self):
        args = parser.parse_args()
        link = args['link']
        search_terms = args['search_terms']
        analysis_interval = args['analysis_interval']
        start_time = args['start_time']
        end_time = args['end_time']
        results = {}

        # results = search(link, search_terms, analysis_interval, start_time, end_time)

        try:
            results = search(link, search_terms, analysis_interval, start_time,
                             end_time)
        except Exception as e:
            return {'message': str(e)}, 422

        return results
Esempio n. 18
0
def worker(api, friends):

    timestamp_start = str(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_start = str((threading.currentThread().getName(), 'Launched'))
    start = log_start + ':' + timestamp_start
    print start

    for friend in friends:
        engine.index(INDEX, 'user', friend.id, social.userToJSON(friend))

        request = {
            "size": 1,
            "sort": [{
                "id": {
                    "order": "desc"
                }
            }],
            "query": {
                "match": {
                    "user.screen_name": friend.screen_name
                }
            }
        }

        docs = engine.search(INDEX, 'tweet', request)
        if (len(docs["hits"]["hits"]) > 0):
            since_id = str(docs["hits"]["hits"][0][u'_id'])
        else:
            since_id = None

        tweets = social.GetTweets(api, friend.screen_name, since_id)

        for tweet in tweets:
            engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))

    timestamp_end = str(
        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    log_end = str((threading.currentThread().getName(), 'Finishing'))
    end = log_end + ':' + timestamp_end
    print end
    return
def Runner(build_index, parse_query, search, determine_relevance):
    
    logging.log(GRADING, "")
    logging.log(GRADING, "-- STARTING TO RUN --")
    
    index = {}
    documentlist = ["us_constitution", "magna_carta", "un_charter"]
    
    build_index(index, documentlist)
    logging.log(GRADING, "INDEX")
    logging.log(GRADING, index)
    
    print "There are {0} documents in the index".format(len(documentlist))
    print "There are {0} terms in the index".format(len(index))
    logging.log(GRADING, "STATISTICS")
    logging.log(GRADING, "{0} docs, {1} terms".format(
        len(documentlist),
        len(index),
    ))
    
    user_query = raw_input("enter your query>")
    query = parse_query(user_query)
    logging.log(GRADING, "QUERY")
    logging.log(GRADING, query)
    
    candidate_set = search(query, index)
    logging.log(GRADING, "CANDIDATE_SET")
    logging.log(GRADING, candidate_set)
    
    relevant_results = determine_relevance(query, candidate_set)
    logging.log(GRADING, "RELEVANT_RESULTS")
    logging.log(GRADING, relevant_results)
    
    results = relevant_results.items()
    results.sort(key=lambda i: i[1], reverse=True)
    print 'The best results for "{0}" are:'.format(user_query)
    for result in results:
        print result
    print '[end of results]'

    logging.log(GRADING, "-- ENDED RUN CLEANLY --")
Esempio n. 20
0
def Main():

	form = FormContent()

	query_option = form["query_option"][0]			# Determine which option is desired

	city = "None"			# Code for 
	city_name = "None"
	if form.has_key("city"):
		city = form["city"][0]
		city_name = functs.convert_station(city)

	year = "None"
	if form.has_key("year"):
		year = form["year"][0]

	month = "None"
	str_month = "None"
	if form.has_key("month"):
		month = form["month"][0]
		str_month = functs.convert_month("0"+month)

	day = "None"
	if form.has_key("day"):
		day = form["day"][0]

	if year == "none" or year == "None":
		style.SendError("You need to specify a search date.")

	style.header("Historical Iowa Weather Data Search Engine","white")	# Standard Setup HTML Document
	style.std_top("Query Results in Iowa Weather Data")			# Standard Header Information

	print '<TABLE NOBORDER>'
	print '<TR><TH colspan="4">Search Paramenters:</TH><TH colspan="6"><font color="red">Time Constraints:</red></TH></TR>'
	print '<TR><TH bgcolor="#EEEEE">Query Option:</TH><TD>'+query_option+'</TD>'
	print '<TH bgcolor="#EEEEE">Station Option:</TH><TD>'+city_name+'</TD>'
	print '<TH bgcolor="#EEEEE"><font color="red">Year:</font></TH><TD>'+year+'</TD>'
	print '<TH bgcolor="#EEEEE"><font color="red">Month:</font></TH><TD>'+str_month+'</TD>'
	print '<TH bgcolor="#EEEEE"><font color="red">Day:</font></TH><TD>'+day+'</TD>'
	print '</TR></TABLE>'

	if city == "None":
		print '<H2 align="center"><font color="blue"><U>Please Enter a city!!</U></font></H2>'
		style.std_bot()
		sys.exit()

	results = engine.search(query_option, city, year, month, day)

	print '<HR>'

	junk_string = 'query_option='+query_option+'&city='+city+'&year='+year+'&month='+month+'&day='+day
	print '<a href="download.py?'+junk_string+'"><B>Click to download this data set</B></a>'

	print '<HR>'

	if len(results) == 0:
		print '<P>This Query did not find any results in the Database system.<BR>'
		print '<P>Please review your query above and try again.<BR>'

	else:
		print '<H2 align="center"><font color="blue"><U>Weather Data for '+city_name+', Iowa</U></font></H2>'
		table_header()
		for i in range(len(results)):
			city = results[i][0]
	                day = results[i][1]
	                climoweek = results[i][2]
	                high  = results[i][3]
	                low = results[i][4]
	                rain = results[i][5]
	                snow = results[i][6]

			result_row(city, day, str(high), str(low), str(rain), str(snow) )
		table_footer()	

	style.std_bot()
Esempio n. 21
0
    def search(self):
        key_iter = []

        search_kword_num = 0
        search_url_num = 0
        for key in self.keys_list:
                    # 输出HTML文件

            search_kword_num = search_kword_num + 1
            if os.path.isdir(self.work_path + '/result'):
                pass
            else:
                os.mkdir(self.work_path + '/result')

            if self.breakpoint:
                filename = self.work_path + \
                    '/result/%s-%s.html' % ((sys.argv[1]
                                             ).decode('utf8'), key.decode('utf8'))
            else:
                filename = self.work_path + \
                    '/result/%s.html' % (key.decode('utf8'))

            # 写文件
            with codecs.open(filename, 'wb', "utf-8") as f:
                f.write(unicode(
                    '<html><head><meta http-equiv="content-type" content="text/html;charset=utf-8"></head><body>'))
                self.br.set_cookiejar(cookielib.LWPCookieJar())

                for engine in self.engine_list:
                    engine_name = engine.engine.__class__.__name__

                    if self.breakpoint and self.num_runinfo.has_option(engine_name, key):
                        ri = RunInfo(
                            key, num=int(self.num_runinfo.get(engine_name, key)))
                    else:
                        ri = RunInfo(key)
                    key_iter.append([engine_name, engine.search(key), ri])

                while key_iter != []:
                    for k_iter in key_iter:
                        self.exist_list = []

                        try:
                            k_iter[2].page += 1
                            # next函数相当重要
                            result_list = next(k_iter[1])

                        except StopIteration:
                            # print 'remove ',k_iter[0]
                            key_iter.remove(k_iter)
                            for k_iter in key_iter:
                                print "      ", k_iter[0]
                            continue

                        except Exception, e:
                            # print k_iter[0],'error'
                            # print e
                            self.error_list.append([e, k_iter[0], k_iter[2]])
                            key_iter.remove(k_iter)
                            continue

                        if result_list:
                            # print "============================"
                            # print "key:",key,'engine:',k_iter[0]
                            # print "***************************"
                            f.write(
                                ('<p>============================</p>\n<p>key:%s  engine:%s</p>\n<p>****************************</p>\n' % (key, k_iter[0])).decode('utf8'))
                            for result in result_list:
                                k_iter[2].num += 1
                                result['engine'] = k_iter[0]
                                result['num'] = k_iter[2].num
                                result['key'] = key
                                result['time_now'] = time.strftime(
                                    '%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                                # print
                                # result['num'],result['title']#,result['url']
                                self.exist_list.append(result['url'])
                                f.write(unicode('<p>%s %s <a href="%s" target="_blank">%s</a></p>\n' % (
                                    result['time_now'], result['num'], result['url'], result['title'])))
                            self.num_runinfo.set(k_iter[0], key, k_iter[2].num)
                            # 什么是nif,什么是rif
                            with open(self.numinfo_file, 'wb') as nif:
                                self.num_runinfo.write(nif)
                            with open(self.linkinfo_file, 'wb') as rif:
                                self.link_runinfo.write(rif)
                            # print "============================"
                            f.write('<p>============================</p>\n')

                            # 更新URL长度
                            search_url_num = search_url_num + \
                                len(self.exist_list)
                            '''
                            存储mongo数据库
                            '''
                            # print '///////////////////////'
                            # print  self.exist_list
                            # print '//////////////////////////'
                            try:
                                self.mongo_operate.add_gray_list(
                                    self.exist_list, self.objectID)
                            except Exception, e:
                                sys.stderr.write(
                                    '%s\n' % MongoError(e, 'meta_run add gray'))
                                sys.stderr.write(
                                    ' task_id: %s\n' % self.task_id)
                            # print
                            # '---------------------seccess###################################'

                            self.update_running_state(
                                search_url_num, search_kword_num)

                f.write('</body></html>')
Esempio n. 22
0
start = 'Start: ' + timestamp_start
print(start)

if (len(sys.argv)==2):
    api_param = sys.argv[1]
else:
    raise Exception('Error en cantidad de parametros ingresados!!!')

api = social.api(api_param)

for account in accounts:
    print('Indexing ' + account)
    user = social.GetUser(api,account)
    id = user.id
    engine.index(INDEX, 'user', id, social.userToJSON(user))

    request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":account}}}

    docs = engine.search(INDEX, 'tweet', request)
    if (len(docs["hits"]["hits"]) > 0):
        since_id = str(docs["hits"]["hits"][0][u'_id'])
    else:
        since_id = None

    tweets = social.GetTweets(api, account, since_id)

    for tweet in tweets:
        print('Tweet>  ' + tweet.text)
        engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))
Esempio n. 23
0
 def test_search_at_mate(self):
     position = mated_position()
     move_suggestion, resulting_position = engine.search(position, 2)
     self.assertEqual(position.fen(), resulting_position.fen())
     self.assertIsNone(move_suggestion)
Esempio n. 24
0
    def search(self):
        key_iter = []

        search_kword_num = 0
        search_url_num = 0
        for key in self.keys_list:
            # 输出HTML文件

            search_kword_num = search_kword_num + 1
            if os.path.isdir(self.work_path + '/result'):
                pass
            else:
                os.mkdir(self.work_path + '/result')

            if self.breakpoint:
                filename = self.work_path + \
                    '/result/%s-%s.html' % ((sys.argv[1]
                                             ).decode('utf8'), key.decode('utf8'))
            else:
                filename = self.work_path + \
                    '/result/%s.html' % (key.decode('utf8'))

            # 写文件
            with codecs.open(filename, 'wb', "utf-8") as f:
                f.write(
                    unicode(
                        '<html><head><meta http-equiv="content-type" content="text/html;charset=utf-8"></head><body>'
                    ))
                self.br.set_cookiejar(cookielib.LWPCookieJar())

                for engine in self.engine_list:
                    engine_name = engine.engine.__class__.__name__

                    if self.breakpoint and self.num_runinfo.has_option(
                            engine_name, key):
                        ri = RunInfo(
                            key,
                            num=int(self.num_runinfo.get(engine_name, key)))
                    else:
                        ri = RunInfo(key)
                    key_iter.append([engine_name, engine.search(key), ri])

                while key_iter != []:
                    for k_iter in key_iter:
                        self.exist_list = []

                        try:
                            k_iter[2].page += 1
                            # next函数相当重要
                            result_list = next(k_iter[1])

                        except StopIteration:
                            # print 'remove ',k_iter[0]
                            key_iter.remove(k_iter)
                            for k_iter in key_iter:
                                print "      ", k_iter[0]
                            continue

                        except Exception, e:
                            # print k_iter[0],'error'
                            # print e
                            self.error_list.append([e, k_iter[0], k_iter[2]])
                            key_iter.remove(k_iter)
                            continue

                        if result_list:
                            # print "============================"
                            # print "key:",key,'engine:',k_iter[0]
                            # print "***************************"
                            f.write((
                                '<p>============================</p>\n<p>key:%s  engine:%s</p>\n<p>****************************</p>\n'
                                % (key, k_iter[0])).decode('utf8'))
                            for result in result_list:
                                k_iter[2].num += 1
                                result['engine'] = k_iter[0]
                                result['num'] = k_iter[2].num
                                result['key'] = key
                                result['time_now'] = time.strftime(
                                    '%Y-%m-%d %H:%M:%S',
                                    time.localtime(time.time()))
                                # print
                                # result['num'],result['title']#,result['url']
                                self.exist_list.append(result['url'])
                                f.write(
                                    unicode(
                                        '<p>%s %s <a href="%s" target="_blank">%s</a></p>\n'
                                        % (result['time_now'], result['num'],
                                           result['url'], result['title'])))
                            self.num_runinfo.set(k_iter[0], key, k_iter[2].num)
                            # 什么是nif,什么是rif
                            with open(self.numinfo_file, 'wb') as nif:
                                self.num_runinfo.write(nif)
                            with open(self.linkinfo_file, 'wb') as rif:
                                self.link_runinfo.write(rif)
                            # print "============================"
                            f.write('<p>============================</p>\n')

                            # 更新URL长度
                            search_url_num = search_url_num + \
                                len(self.exist_list)
                            '''
                            存储mongo数据库
                            '''
                            # print '///////////////////////'
                            # print  self.exist_list
                            # print '//////////////////////////'
                            try:
                                self.mongo_operate.add_gray_list(
                                    self.exist_list, self.objectID)
                            except Exception, e:
                                sys.stderr.write(
                                    '%s\n' %
                                    MongoError(e, 'meta_run add gray'))
                                sys.stderr.write(' task_id: %s\n' %
                                                 self.task_id)
                            # print
                            # '---------------------seccess###################################'

                            self.update_running_state(search_url_num,
                                                      search_kword_num)

                f.write('</body></html>')
Esempio n. 25
0
start = 'Start: ' + timestamp_start
print start

if (len(sys.argv)==2):
    api_param = sys.argv[1]
else:
    raise Exception('Error en cantidad de parametros ingresados!!!')

api = social.api(api_param)

for account in accounts:
    print 'Indexing ' + account
    user = social.GetUser(api,account)
    id = user.__getattribute__('_id')
    engine.index(INDEX, 'user', id, social.userToJSON(user))

    request={"size":1,"sort":[{"id":{"order":"desc"}}], "query": {"match": {
                 "user.screen_name":account}}}

    docs = engine.search(INDEX, 'tweet', request)
    if (len(docs["hits"]["hits"]) > 0):
        since_id = str(docs["hits"]["hits"][0][u'_id'])
    else:
        since_id = None

    tweets = social.GetTweets(api, account, since_id)

    for tweet in tweets:
        print 'Tweet>  ' + tweet.text
        engine.index(INDEX, 'tweet', tweet.id, social.tweetToJSON(tweet))
def search_command():
    list1.delete(0,END)
    for row in engine.search(title_text.get(), author_text.get(), year_text.get(), isbn_text.get()):
        list1.insert(END,row)
Esempio n. 27
0
def Main():
    style.header("Download Dataset from IowaWx Archive", "white")
    print "<H2>Instructions for downloading from the PALS server</H2>"

    query_option = functs.get_content("query_option")
    city = functs.get_content("city")
    year = functs.get_content("year")
    month = functs.get_content("month")
    day = functs.get_content("day")

    if month == "None":
        str_month = "None"
    else:
        str_month = functs.convert_month("0" + month)

    if city == "None":
        str_city = "None"
    else:
        str_city = functs.convert_station(city)

    print "<HR><H3>1. Review Search Parameters....</H3>"
    print "<TABLE NOBORDER>"
    print '<TR><TH colspan="4">Search Paramenters:</TH><TH colspan="6"><font color="red">Time Constraints:</red></TH></TR>'
    print '<TR><TH bgcolor="#EEEEE">Query Option:</TH><TD>' + query_option + "</TD>"
    print '<TH bgcolor="#EEEEE">Station Option:</TH><TD>' + str_city + "</TD>"
    print '<TH bgcolor="#EEEEE"><font color="red">Year:</font></TH><TD>' + year + "</TD>"
    print '<TH bgcolor="#EEEEE"><font color="red">Month:</font></TH><TD>' + str_month + "</TD>"
    print '<TH bgcolor="#EEEEE"><font color="red">Day:</font></TH><TD>' + day + "</TD>"
    print "</TR></TABLE>"
    print "<HR>"

    print "<H3>2. Instructions for downloading this data.</H3>"
    print "Below a link with appear and you need to hold the shift key down and click on the link.<BR>"
    print "This should allow you to save the text file locally, so then you can do what ever you want with it.<BR>"
    print "<HR>"

    url = "/archivewx/iowawx/tmp/" + filename + ".txt"

    print "<H3>3. Creating data file... (May take a few seconds.)</H3>"

    results = engine.search(query_option, city, year, month, day)

    for i in range(len(results)):
        city = results[i][0]
        year = results[i][1]
        month = results[i][2]
        day = results[i][3]
        climoweek = results[i][4]
        high = results[i][5]
        low = results[i][6]
        rain = results[i][7]
        snow = results[i][8]
        file.write(city + "\t" + year + "\t" + month + "\t" + day + "\t" + climoweek + "\t" + high + "\t")
        file.write(low + "\t" + rain + "\t" + snow + "\n")
    file.close()

    print "<BR>File created successfully!! <BR><HR>"

    print "<H3>4. Download file</H3>"
    print '<a href="' + url + '">Shift-Click Here, to download file</a><BR>'

    style.std_bot()
Esempio n. 28
0
File: bing.py Progetto: krornus/misc
    def search_page(self):

        return [str(x[0][0]) for x in engine.search(self.page,self.path)]