Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-g",
        "--get",
        type=str,
        help=
        "Print the document in database if exists, otherwise crawl it and then print"
    )
    parser.add_argument("-c",
                        "--crawl",
                        type=str,
                        help="Crawl one's total answer pictures")
    parser.add_argument("-f",
                        "--find",
                        type=str,
                        help="Print the document in database")
    parser.add_argument("-d",
                        "--delete",
                        type=str,
                        help="delete one document from database")
    args = parser.parse_args()
    db_connection = DBConnection()
    if args.crawl is not None:
        crawl_one(args.crawl, db_connection, COOKIES_STR)
    if args.find is not None:
        pprint(db_connection.find_one(args.find))
    if args.get is not None:
        pprint(get_one(args.get, db_connection, COOKIES_STR))
    if args.delete is not None:
        print(db_connection.delete_many(args.delete))
Ejemplo n.º 2
0
 def __init__(self):
     self.bag_of_words = [
         'modi', 'pm', 'visit', 'narendra', 'prime minister'
     ]
     self.bag_of_words_set = set(self.bag_of_words)
     self.db_con = DBConnection().create_connection()
     self.cursor = self.db_con.cursor()
Ejemplo n.º 3
0
def init():
    print("Begin initialize...")
    db_connection = DBConnection()
    url_token_set = db_connection.restore_url_token()

    origin = Author(ORIGIN_URL_TOKEN, COOKIES_STR, ORIGIN_NAME, ORIGIN_GENDER,
                    ORIGIN_AVATAR_URL_TEMPLATE)
    return db_connection, url_token_set, origin
Ejemplo n.º 4
0
 def __init__(self):
     self.year = 2015
     self.base_url = "http://timesofindia.indiatimes.com/%s"
     self.url_to_scrape = None
     self.db_con = DBConnection().create_connection()
     self.cursor = self.db_con.cursor()
     self.request_headers = {
         "User-Agent":
         "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
     }
Ejemplo n.º 5
0
def main():
    parameter_names = [
        "hero_movespeed", "hero_attackspeed", "hero_attackrange", "hero_attackdamage", "fraction_neutral_left",
        "neutral_total_eff_hp", "targeted_neutral_eff_hp", "fraction_lane_left", "lane_total_eff_hp",
        "targeted_lane_eff_hp", "damage_spread_neutral", "damage_spread_lane", "success"
    ]

    #starting_weights = [0.0033, -0.85, -0.0015, 0.02, -4, -0.003, 0.003, 1.25, 0.0005, 0, 0, 0]
    db = DBConnection("doublepull", 1)
    loop = Loop(parameter_names, db, 100)
    loop.go()
Ejemplo n.º 6
0
async def get_account_by_message(
        request: web.BaseRequest) -> web.json_response:
    from_date = request.match_info.get('from')
    to_date = request.match_info.get('to')
    key_word = request.query.get('q').split(' ')

    and_list = []
    or_list = []

    and_list.append(key_word.pop(0))
    index = 0
    for _ in range(0, int(len(key_word) / 2)):
        if key_word[index].lower() == 'and':
            and_list.append(key_word[index + 1])
        if key_word[index].lower() == 'or':
            or_list.append(key_word[index + 1])
        index += 2

    sub_query = ''
    for value in and_list:
        sub_query += f" AND lower(message) like '%{value.lower()}%'"
    for value in or_list:
        sub_query += f" OR lower(message) like '%{value.lower()}%'"

    async with DBConnection(request) as connection, connection.transaction(
            isolation='serializable'):
        query = f"""
            SELECT owner_id, owner_name, sum(engagement) as total_engagement, array_agg(id) as id_list
            FROM data 
            WHERE time BETWEEN $1 AND $2 
            {sub_query}
            GROUP BY owner_id,owner_name
            ORDER BY total_engagement DESC
            limit 10
        """
        print(query)
        result = await connection.fetch(
            query,
            arrow.get(from_date).datetime,
            arrow.get(to_date).datetime,
        )
        if not result:
            raise web.HTTPNotFound(text='daily message not found')
        payload = []
        for item in result:
            item = dict(item)
            url_id_list = []
            for msg in item['id_list']:
                msg = reverse('message', msg_id=msg)
                url_id_list.append(msg)
            item['id_list'] = url_id_list
            payload.append(item)
        return web.json_response(payload)
Ejemplo n.º 7
0
def display_vist_details():
    db_con = DBConnection().create_connection()
    cursor = db_con.cursor()
    sql = "select place, visit_date from visit_info"
    cursor.execute(sql)
    rs_tuple_list = cursor.fetchall()
    value_dict = {}

    for rs_tuple in rs_tuple_list:
        (place, date_of_visit) = rs_tuple
        value_dict[place] = date_of_visit

    return render_template('index.html', vist_details=value_dict)
Ejemplo n.º 8
0
def hello_world():
    db_con = DBConnection().create_connection()
    cursor = db_con.cursor()
    sql = "select place, visit_date from visit_info"
    cursor.execute(sql)
    rs_tuple_list = cursor.fetchall()

    #return jsonify(rs_tuple_list)
    value_dict = {}
    for rs_tuple in rs_tuple_list:
        (place, date_of_visit) = rs_tuple
        value_dict[place] = date_of_visit
    #return 'Hello World'
    return jsonify(value_dict)
Ejemplo n.º 9
0
def main():
    max_api_calls = api["max_api_calls"]
    urls_per_call = api["urls_per_call"]

    isProcessed = False

    endCursor = urls_per_call
    listingIds = []
    queryListingsList = []

    apiCallCount = 0
    listingCount = 0
    listEndReached = True

    db = DBConnection(database["host"], database["port"])
    db.set_database(database["name"])
    db.set_collection(database["collection"])
    records = db.get_records()

    for index, record in enumerate(records):
        try:
            if record["offer"]["listing"]["is_processed"] == True:
                isProcessed = True
            else:
                isProcessed = False
        except KeyError:
            isProcessed = False

        if not isProcessed:
            listingIds.append(record["offer"]["listing"]["listingId"])
            listingCount += 1
            if listingCount == endCursor:
                queryListingsList.append(listingIds)
                listingIds = []
                endCursor += urls_per_call
                apiCallCount += 1

        if apiCallCount >= max_api_calls:
            listEndReached = False
            break

    if listEndReached:
        queryListingsList.append(listingIds)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        executor.map(process_listing_metric_data, queryListingsList,
                     repeat(db), repeat(urls_per_call))

    if listEndReached:
        db.check_listings()
Ejemplo n.º 10
0
async def get_message_by_id(request: web.BaseRequest) -> web.json_response:
    msg_id = request.match_info.get('msg_id')

    async with DBConnection(request) as connection, connection.transaction(
            isolation='serializable'):
        result = await connection.fetchrow(
            """
            SELECT * 
            FROM data 
            WHERE id = $1
        """,
            msg_id,
        )
        if not result:
            raise web.HTTPNotFound(text='message not found')
        payload = dict(result)
        payload['time'] = payload['time'].isoformat()
        return web.json_response(payload)
Ejemplo n.º 11
0
async def get_daily_message_count(
        request: web.BaseRequest) -> web.json_response:
    from_date = request.match_info.get('from')
    to_date = request.match_info.get('to')

    async with DBConnection(request) as connection, connection.transaction(
            isolation='serializable'):
        result = await connection.fetch(
            """
            SELECT to_char(date_trunc('day', time), 'YYYY-MM-DD') as date, count(*) 
            FROM data 
            WHERE time BETWEEN $1 AND $2 
            GROUP BY date 
            ORDER BY date ASC
        """,
            arrow.get(from_date).datetime,
            arrow.get(to_date).datetime)
        if not result:
            raise web.HTTPNotFound(text='daily message not found')
        return web.json_response(dict(result))
Ejemplo n.º 12
0
async def get_word_cloud(request: web.FileResponse) -> web.json_response:
    from_date = request.match_info.get('from')
    to_date = request.match_info.get('to')
    cloud_type = request.match_info.get('cloud_type')
    if cloud_type not in ('wordcloud', 'hashtag', 'mention'):
        cloud_type = 'wordcloud'

    if os.path.exists(
            f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png"):
        return web.FileResponse(
            f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png")

    async with DBConnection(request) as connection, connection.transaction(
            isolation='serializable'):
        result = await connection.fetch(
            """
            SELECT message
            FROM data 
            WHERE time BETWEEN $1 AND $2
        """,
            arrow.get(from_date).datetime,
            arrow.get(to_date).datetime)
        if not result:
            raise web.HTTPNotFound(text='daily message not found')
        dictionary = await get_count_by_list(result, cloud_type)

        wc = WordCloud(
            background_color="white",
            width=1000,
            height=1000,
            font_path='THSarabunNew.ttf',
            relative_scaling=0.5,
        )
        wc.generate_from_frequencies(dictionary)
        plt.imshow(wc, interpolation='bilinear')
        plt.axis("off")
        plt.savefig(
            f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png",
            format="png")
        return web.FileResponse(
            f"{settings.MEDIA_PATH}/{cloud_type}__{from_date}__{to_date}.png")
Ejemplo n.º 13
0
async def get_message_by_engagement(
        request: web.BaseRequest) -> web.json_response:
    from_date = request.match_info.get('from')
    to_date = request.match_info.get('to')

    async with DBConnection(request) as connection, connection.transaction(
            isolation='serializable'):
        result = await connection.fetch(
            """
            SELECT engagement, time::TEXT, message
            FROM data 
            WHERE time BETWEEN $1 AND $2 
            ORDER BY engagement DESC 
            limit 10
        """,
            arrow.get(from_date).datetime,
            arrow.get(to_date).datetime)
        if not result:
            raise web.HTTPNotFound(text='daily message not found')
        payload = []
        for item in result:
            payload.append(dict(item))
        return web.json_response(payload)
Ejemplo n.º 14
0
    async def gs(self,
                 ctx,
                 start: typing.Optional[int] = 1,
                 num: typing.Optional[int] = 1,
                 *,
                 arg,
                 skip_cache=False,
                 delete=False):
        '''Searches the phrase given on google'''

        if not (start >= 1 and start <= 200 and num >= 1 and num <= 10):
            await ctx.send('Numbers not in bound')
            return

        # sanitization
        searchTerm = ''
        for i in arg:
            if i.isalnum or i in "'+.:":
                searchTerm += i
        searchTerm = ''.join(searchTerm)

        gis = GoogleImagesSearch(secret_api_key, secret_cx_code)

        _search_params = {
            'q': searchTerm,
            'start': start,
            'num': num,
            'safe': 'high',
        }

        webhook = await get_web_hook(ctx.channel)

        with DBConnection('meme_cache.db') as db_conn:
            cur = db_conn.conn.cursor()

            if (not skip_cache) or (num != 1):
                for row in cur.execute(db_conn.select_query %
                                       (searchTerm, start)):
                    url = row[0]
                    if url:
                        embed = discord.Embed()
                        embed.set_image(url=url)
                        if not delete:
                            await webhook.send(
                                content=ctx.message.content,
                                embed=embed,
                                username=ctx.message.author.nick,
                                avatar_url=ctx.message.author.avatar_url)
                        else:
                            await webhook.send(
                                embed=embed,
                                username=ctx.message.author.nick,
                                avatar_url=ctx.message.author.avatar_url)
                        await ctx.message.delete()
                    else:
                        await ctx.send('Couldn\'t find the searched image.')
                    return

            gis.search(search_params=_search_params)
            embeds = []
            for i, img in enumerate(gis.results()):
                if img.url:
                    embed_data = {
                        'type': 'image',
                        'image': {
                            'url': img.url,
                        },
                    }
                    embeds.append(discord.Embed.from_dict(embed_data))

                try:
                    cur.execute(db_conn.insert_query %
                                (searchTerm, start + i, img.url))
                    db_conn.conn.commit()
                except sqlite3.IntegrityError:
                    cur.execute(db_conn.update_query %
                                (img.url, start + i, searchTerm))

        if not delete:
            cont = ctx.message.content
            await webhook.send(content=cont,
                               embeds=embeds,
                               username=ctx.message.author.display_name,
                               avatar_url=ctx.message.author.avatar_url)
        else:
            await webhook.send(embeds=embeds,
                               username=ctx.message.author.display_name,
                               avatar_url=ctx.message.author.avatar_url)
        await ctx.message.delete()
Ejemplo n.º 15
0
# -*- coding=utf8 -*-

from flask import Flask, request, send_from_directory, jsonify
import zhihutu
from database import DBConnection

app = Flask(__name__, static_url_path='')
db_connection = DBConnection()


@app.route('/')
def index():
    return send_from_directory('.', 'index.html')


@app.route('/find', methods=['POST'])
def find():
    url_token = request.form['url_token']
    cookies_str = request.form['cookies_str']  # optional param

    result = db_connection.find_one(url_token)
    if result is None:
        if len(cookies_str) == 0:
            return jsonify({'name': 'Not Found'})
        result = zhihutu.get_one(url_token, db_connection, cookies_str)

    if '_id' in result:
        result.pop('_id')

    return jsonify(result)
Ejemplo n.º 16
0
    'dec'
]
regex_list = ['']

country_capital_list = []

with open('/home/aish/Desktop/capital.txt', 'r') as f:
    lines = f.readlines()
    for capital_name in lines:
        country_capital_list.append(capital_name)

country_capital_list.extend(COUNTRY)
regex = "|".join(str(item.lower()) for item in country_capital_list)

sql = "UPDATE classified_article SET counter = %s WHERE id = %s"
dbcon = DBConnection().create_connection()
cursor = dbcon.cursor()
cursor.execute(
    "SELECT id,article,counter FROM classified_article WHERE id > 0 AND counter >3 ORDER BY id asc"
)
result_set = cursor.fetchall()

frequency_dict = {}
for result in result_set:
    (id, content, filter_count) = result
    #print (id)
    content = content.lower()
    content = re.sub("<.*?\>", " ", content)
    content = re.sub('[,.]', '', content)
    '''
    #Stage 1