Esempio n. 1
0
def get_right_url():
    list_site = ss.search_google("Hébergement Web tunisie")
    list_name = ss.get_name(list_site)
    datas = ss.get_data(list_site, list_name)
    list_of_urls = []
    for xx in datas:
        try:
            getpage = requests.get(xx["url"])
            getpage_soup = BeautifulSoup(getpage.text, 'html.parser')
            bool2 = False
            for link in getpage_soup.findAll('a'):
                x = link.get('href')
                bool2 = "domain" in x
                if (bool2 == True):
                    doc = {
                        "name_of_url": xx["name"],
                        "the_right_url": link.get('href')
                    }
                    if (check_data_duplicate(list_of_urls, doc) == False):
                        list_of_urls.append(doc)
                        print(
                            "---------------------------------------------------------"
                            + xx["name"] +
                            "----------------------------------------------------------------------"
                        )
        except:
            pass
    return list_of_urls
Esempio n. 2
0
async def search(context):
    try:
        args = str(context.message.content).split(' ')
        results = search_google((' ').join(args[2:len(args) - 1]),
                                int(args[-1]))
        for link in results:
            await context.send(link)
    except Exception as e:
        print(e)
Esempio n. 3
0
def search_request(search, reddit, twitter, news):

    coms = ['n']
    #tweets = s.search_twitter(search)
    urls = []
    # urls = ["https://www.reddit.com/r/SFGiants/","https://www.reddit.com/r/Politics/"] <-- good way to test if out of searches
    if twitter == "true":
        coms += s.search_twitter(search)
    if reddit == "true":
        urls = s.search_google(search)
        coms += s.search_reddit(urls)
    if news == "true":
        coms += s.search_all_news(search)

    avg_sentiment, sample = s.analyze_text(coms, search)

    comment_length = len(coms)
    sites_searched = [reddit, twitter, news]
    word_count = s.word_count(coms)

    search_db_entry = UserSearch(
        search=search,
        urls=urls,
        avg_sentiment=avg_sentiment,
        word_count=word_count,
        comments=comment_length,
        sites=sites_searched
    )

    db.session.add(search_db_entry)
    db.session.commit()

    query_history = UserSearch.query.filter_by(search=search.strip()).all()
    query_history_sentiment = dict()
    sorted_query_history = []

    for query in query_history:
        query_history_sentiment[query.time] = query.avg_sentiment
        sorted_query_history.append(query.time)

    sorted_query_history = sorted(sorted_query_history)

    output_dict = {
        "urls": urls,
        "avg_sentiment": avg_sentiment,
        "word_count": word_count,
        "comments": comment_length,
        "sample": sample,
        "sites": sites_searched,
        "query_history": sorted_query_history,
        "query_history_sentiment": query_history_sentiment
    }

    return output_dict
Esempio n. 4
0
    print("collection deleted")


"""
def static_data(name_collection,name_collection_static_data):
	print("hello")
	mycol = mydb[name_collection]
	mycol_static_data = mydb[name_collection_static_data]
	for d1 in mycol.find():
		if(check_data_duplicate_db_by_name(d1,"data_static")==True):
			mydict={"name":d1["name"],"total":0}
			insert_document(mydict,"data_static")
			
"""

list_site = ss.search_google("Hébergement Web tunisie")
list_name = ss.get_name(list_site)
datas = ss.get_data(list_site, list_name)
for xx in datas:
    the_last_id = show_the_last_id("application_conccurent")
    xx['id'] = the_last_id + 1
    if (check_data_duplicate_db(xx, "application_conccurent") == False):
        insert_document(xx, "application_conccurent")
"""
##insert_document_list(datas,"application_conccurent")



##the_last_id=show_the_last_id("application_conccurent")

##delete_all_doc_in_coll("application_conccurent")
Esempio n. 5
0
def search(keywords):
    list_site = ss.search_google(keywords)
    list_name = ss.get_name(list_site)
    tasks = ss.get_data(list_site, list_name)
    task = tasks
    return task