Beispiel #1
0
def complain():
    file = request.files['video']
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = current_app.config['VIDEO_FOLDER'] + '/' + filename.rsplit(
            '.', 1)[0]
        file.save(os.path.join(filepath, filename))
        p = subprocess.Popen(current_app.config['BASH_TEST'] +
                             filename.rsplit('.', 1)[0],
                             shell=True)
        while not p.poll():
            gevent.sleep(1)
        with open(filepath + "/decode_dnn/log/decode.1.log") as f:
            s = f.read()
        r = re.sub(
            "[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\@\#\&\*\%\-\_\"\+]",
            "", s)
        r = r.strip()
        r = r.lstrip()
        for k in r.split(" "):
            if len(k) > 3:
                Keyword.add(k)
    with open(
            current_app.config['VIDEO_FOLDER'] +
            '/kwsdatadir/raw_keywords.txt', 'w+') as f:
        for i in Keyword.high_rate_words():
            f.write(i.encode('utf8') + '\n')
    return 'success'
Beispiel #2
0
 def test_calc_dists(self):
     kw1 = Keyword(word='munição', requests=[])
     kw2 = Keyword(word='pistola', requests=[])
     kw3 = Keyword(word='espingarda', requests=[])
     self.assertTrue(
         (self.classifier.calc_dists('arma', [kw1, kw2, kw3]) == np.array(
             [0.677431, 0.6866069, 0.71047467], dtype='float32')).all())
Beispiel #3
0
def index():
    """
    关键词管理首页, 列举站点级别的关键词, 并支持简单查询和翻页.
    """
    s = request.args.get('status', u'bare,processed,repeated')
    k = request.args.get('keyword', '')
    o = request.args.get('owner', '')
    p = int(request.args.get('page', '1'))
    start = (p - 1) * PAGE_COUNT

    condition = {'level': KeywordLevel.SITE}
    if k:
        condition['name'] = k.strip()
    if o:
        condition['owner'] = o.strip()
    status = s.split(u',')
    if status:
        condition['status'] = {'$in': status}

    count = Keyword.count(condition)
    cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)])
    keywords = []
    for c in cursor:
        set_index(c)
        keywords.append(c)
    pagination = Pagination(p, PAGE_COUNT, count)
    return render_template('seo/index.html', keywords=keywords, pagination=pagination)
Beispiel #4
0
def fetch_keywords():
    """
    获取站点级别的关键字并插入数据库中.
    """
    ss.headers['Referer'] = ym
    r = ss.get(ym + '/girls/all/')
    r.encoding = 'gbk'
    tree = html.fromstring(r.text)
    links = tree.xpath('//div[@class="listap"]/a')
    print 'Found %s keywords' % len(links)
    ret = []
    for link in links:
        msg = 'Processing keyword %s' % link.get('title')
        text = link.get('title').strip()
        keyword = Keyword.find_one({'name': text})
        name = unicode(text)
        if keyword:
            msg += ', skipped as existing'
        else:
            keyword = Keyword()
            keyword.name = name
            keyword.level = KeywordLevel.SITE
            keyword.refer = u'%s%s' % (ym, link.get('href'))
            keyword.save()
        print msg
        ret.append(name)

    return ret
Beispiel #5
0
def test_base(session, db):
    request = Request(id=13, url='http://www.google.com', status='done')
    keyword = Keyword(word='carros', requests=[request])
    label = Label(name='veículos', restrict=True, keywords=[keyword])

    db.create_all()
    db.session.add(label)
    db.session.commit()

    #test create
    assert session.query(Request).count() == 1
    assert session.query(Keyword).count() == 1
    assert session.query(Label).count() == 1

    #test read
    assert Request.query.filter_by(
        id=13).first().url == "http://www.google.com"
    assert Keyword.query.filter_by(word='carros').first().requests[0].id == 13
    assert Label.query.filter_by(name='veículos').first().restrict == True

    #test update
    request.url = "http://www.twitch.tv"
    db.session.commit()
    assert Request.query.filter_by(id=13).first().url == "http://www.twitch.tv"
    label.restrict = False

    assert not Label.query.filter_by(name='veículos').first().restrict

    #test delete
    db.session.delete(label)
    db.session.commit()

    assert Label.query.filter_by(name='veículos').first() == None
    assert Request.query.filter_by(id=13).first().url == "http://www.twitch.tv"
    assert Keyword.query.filter_by(word='carros').first().requests[0].id == 13
Beispiel #6
0
def hearsay(keyword_id):
    """
    编辑关键字对应的文章.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    # Open page
    if request.method == 'GET':
        return render_template('seo/hearsay.html', keyword=keyword)
    # Handle post request
    else:
        current_app.logger.info('Try to save hearsay for keyword %s/%s' % (keyword._id, keyword.name))
        title = request.form.get('title', '')
        body = request.form.get('body', '')
        if not title:
            return jsonify(success=False, message='文章标题不能为空!')
        if not body:
            return jsonify(success=False, message='文章内容不能为空!')

        is_new = True if not keyword.hearsay else False
        keyword.hearsay.title = title
        keyword.hearsay.body = body
        keyword.updateTime = datetime.now()
        if is_new:
            keyword.status = KeywordStatus.PROCESSED

        keyword.save()

        if not current_app.debug and is_new:
            notify_baidu(current_app._get_current_object(), keyword._id)

        return jsonify(success=True, message='成功保存了你的文章。')
Beispiel #7
0
def upfile():
    file = request.files['video']
    elderid = request.values['elderid']
    familyid = bind_list.get(elderid)
    print(file.filename)
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = current_app.config['VIDEO_FOLDER'] + '/' + filename.rsplit(
            '.', 1)[0]
        if os.path.exists(filepath):
            delete_file_folder(filepath)
        os.makedirs(filepath + '/video')
        file.save(os.path.join(filepath + '/video', filename))
        p = subprocess.Popen(current_app.config['BASH_TEST'] +
                             filename.rsplit('.', 1)[0],
                             shell=True)
        while not p.poll():
            gevent.sleep(1)
        if os.path.isfile(filepath + '/kwsdir/result.xml'):
            msg[familyid].put('dangercalling')
            return 'find'
        with open(filepath + "/decode_dnn/log/decode.1.log") as f:
            s = f.read()
        r = re.sub(
            "[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\<\>\/\?\~\@\#\&\*\%\-\_\"\+]",
            "", s)
        r = r.strip()
        r = r.lstrip()
        klist = Keyword.high_rate_words()
        for k in r.split(" "):
            if k in klist:
                msg[familyid].put('dangercalling')
                return 'find'
    return 'notfind'
Beispiel #8
0
    def test_keywords_database(self):
        ''' test additions to keywords database '''
        u1 = User(username='******', email='*****@*****.**')

        db.session.add(u1)
        db.session.commit()

        current_user_id = u1.id

        keywords1 = Keyword(body = 'This is a test', timestamp = '20200417', id_user = current_user_id, user_id = current_user_id)

        db.session.add(keywords1)
        db.session.commit()

        ''' assert that each user has keywords assigned to them, and assert each keyword is included'''

        assert keywords1.id_user = current_user_id
        list_keywords = keywords1.body.split(' ')
        for item in list_keywords:
            assert item in keywords1.body

        db.session.delete(u1)
        db.session.commit()

        db.session.delete(keywords1)
        db.session.commit()
 def setUp(self):
     '''
     Set up method that will run before every Test
     '''
     self.new_keyword = Keyword(
         'Stan Schroeder',
         'Your trusted source for breaking news, analysis, exclusive interviews, headlines, and videos at ABCNews.com.',
         'https://abcnews.go.com', '2020-05-16T00:34:00Z',
         'https://mashable.com/article/bitcoin-halving-2020/')
Beispiel #10
0
def refresh(keyword_id):
    """
    刷新一个指定关键字的长尾关键字.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    analyze_keyword(current_app._get_current_object(), keyword)
    return jsonify(success=True, message='成功触发了刷新请求,请稍候查看最新数据。')
Beispiel #11
0
def analyze_keyword(app, keyword):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    目前是从5118抓取.
    """
    app.logger.info('Try to analyze keyword %s/%s' % (keyword._id, keyword.name))

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(keyword.name)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        app.logger.info('Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result))

        if name == keyword.name:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            long_tail = Keyword.find_one({'name': name})
            if not long_tail:
                long_tail = Keyword()
                long_tail.name = name
                long_tail.level = KeywordLevel.LONG_TAIL
                long_tail.parentId = keyword._id

            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()
Beispiel #12
0
def create_keyword(title):
    title = utils.clean_data(title)
    print(title)
    print('create_keyword')
    keyword = False
    query = db.session.query(Keyword).filter(Keyword.title == title)
    if query.count() == 0:
        keyword = Keyword(title)
        res = db.session.add(keyword)
        db.session.commit()
    else:
        keyword = query.first()

    return keyword
def create_keyword():
    try:
        # TODO: verify category belongs to passed user ID
        data = request.json
        keyword = Keyword(keyword=data['keyword'],
                          is_excluded=data["is_excluded"],
                          category_id=data['category_id'])
        db.session.add(keyword)
        db.session.commit()
        return APIResponseBuilder.success({"keyword": keyword})
    except SQLAlchemyError as e:
        return APIResponseBuilder.error(f"Issue running query: {e}")
    except Exception as e:
        return APIResponseBuilder.error(f"Error encountered: {e}")
Beispiel #14
0
def longtail(keyword_id):
    """
    获取指定站点关键字下的长尾关键字.
    """
    keyword = Keyword.find_one({'_id': keyword_id})
    if not keyword:
        abort(404)

    s = request.args.get('status', u'bare,processed,repeated')
    p = int(request.args.get('page', '1'))
    start = (p - 1) * PAGE_COUNT
    condition = {'level': KeywordLevel.LONG_TAIL, 'parentId': keyword_id}
    status = s.split(u',')
    if status:
        condition['status'] = {'$in': status}

    count = Keyword.count(condition)
    cursor = Keyword.find(condition, skip=start, limit=PAGE_COUNT, sort=[('baiduIndex', pymongo.DESCENDING)])
    keywords = []
    for c in cursor:
        keywords.append(c)
    pagination = Pagination(p, PAGE_COUNT, count)

    return render_template('seo/longtail.html', keyword=keyword, keywords=keywords, pagination=pagination)
Beispiel #15
0
def update_or_create_kws(words, req, db):
    """
    Creates or updates the keywords in the Keyword table.

    # Input:
        - words (list): a list of strings to be inserted or modified in the Keyword table.
        - req (Request): a Request to be associated with the words.
        - db (database): The app`s database.
    """
    print("updating keywords: ", words)
    if words is None: return
    for word in words:
        kw = Keyword.query.filter_by(word=word).first()
        if kw is None:
            kw = Keyword(word=word, requests=[req])
            db.session.add(kw)
        else:
            kw.requests.append(req)

        db.session.commit()
Beispiel #16
0
def keywords():
    print("keyword begin")

    form = KeywordForm()
    key_words_chosen = False
    if form.validate_on_submit():
        #print(f'keywords data in form is: {form.keywords.data}')
        Keyword.query.filter_by(id_user=current_user.id).delete()
        keywords = Keyword(body=form.keywords.data, id_user=current_user.id)
        db.session.add(keywords)
        db.session.commit()
        flash('You have chosen your keywords!')
        # return redirect(link)
        #print("keyword end")
        key_words_chosen = True
        redirect(url_for('recommend'))

    print("returned to keywords")
    return render_template("keywords.html",
                           form=form,
                           keywords_chosen=key_words_chosen)
Beispiel #17
0
def import_data_for_user():
    data = request.json
    try:
        for category in data["categories"]:
            # Create new category
            cat = Category(user_id=data["uuid"],
                           category_name=category["name"])
            db.session.add(cat)
            db.session.commit()
            # add keywords
            for keyword in category["keywords"]:
                k = Keyword(keyword=keyword["keyword"],
                            is_excluded=keyword["is_excluded"],
                            category_id=cat.id)
                db.session.add(k)
            db.session.commit()
        return APIResponseBuilder.success({"success": True})
    except SQLAlchemyError as e:
        return APIResponseBuilder.error(f"Issue running query: {e}")
    except Exception as e:
        return APIResponseBuilder.error(f"Error encountered: {e}")
Beispiel #18
0
def popdb():

    #adding tuples to db

    k1 = Keyword(name='green')
    k2 = Keyword(name='yellow')
    k3 = Keyword(name='red')
    k4 = Keyword(name='white')
    k5 = Keyword(name='tan')
    k6 = Keyword(name='orange')
    k7 = Keyword(name='brown')
    k8 = Keyword(name='black')
    k9 = Keyword(name='vegetable')
    k10 = Keyword(name='starch')
    k11 = Keyword(name='squash')
    k12 = Keyword(name='gourd')
    k13 = Keyword(name='legume')
    k14 = Keyword(name='maize')
    k15 = Keyword(name='spicy')
    k16 = Keyword(name='sweet')
    k17 = Keyword(name='tart')
    k18 = Keyword(name='leafy')
    k19 = Keyword(name='grain')
    k20 = Keyword(name='cereal')
    k21 = Keyword(name='cabbage')

    db.session.add_all([
        k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k14, k15, k16,
        k17, k18, k19, k20, k21
    ])
    db.session.commit()

    s1 = Supplier(name='Griggs Farm',
                  address='599 Boston Rd',
                  zipcode=1821,
                  city='Billerica',
                  state='MA')
    s2 = Supplier(name='Krochmal Farms',
                  address='31 Jennie\'s Way',
                  zipcode=1876,
                  city='Tewksbury',
                  state='MA')
    s3 = Supplier(name='Great Brook Dairy Farm',
                  address='247 North Rd',
                  zipcode=1741,
                  city='Carlisle',
                  state='MA')
    s4 = Supplier(name='Farmer Daves',
                  address='437 Parker Rd',
                  zipcode=1826,
                  city='Dracut',
                  state='MA')
    s5 = Supplier(name='Jones Farm',
                  address='246 Acton Rd',
                  zipcode=1824,
                  city='Chelmsford',
                  state='MA')
    s6 = Supplier(name='Swenson Farms',
                  address='50 Mill Rd',
                  zipcode=1862,
                  city='Chelmsford',
                  state='MA')
    s7 = Supplier(name='Drew Farm',
                  address='31 Tadmuck Rd',
                  zipcode=1886,
                  city='Westford',
                  state='MA')
    s8 = Supplier(name='Clark Farm',
                  address='185 Concord St',
                  zipcode=1741,
                  city='Carlisle',
                  state='MA')
    s9 = Supplier(name='Parlee Farm',
                  address='135 Pine Hill Rd',
                  zipcode=1824,
                  city='Chelmsford',
                  state='MA')
    s10 = Supplier(name='Wright-Locke Farm',
                   address='78 Ridge St',
                   zipcode=1890,
                   city='Winchester',
                   state='MA')
    s11 = Supplier(name='Indian Creek Farm',
                   address='1408 Trumansburg Rd',
                   zipcode=14850,
                   city='Ithaca',
                   state='NY')
    s12 = Supplier(name='Stick and Stone Farm',
                   address='1605 Trumansburg Rd',
                   zipcode=14850,
                   city='Ithaca',
                   state='NY')
    s13 = Supplier(name='RoseBarb Farms',
                   address='108 Landon Rd',
                   zipcode=14850,
                   city='Ithaca',
                   state='NY')
    s14 = Supplier(name='Three Swallows Farm',
                   address='23 Nelson Rd',
                   zipcode=14850,
                   city='Ithaca',
                   state='NY')
    s15 = Supplier(name='HoneyRock Farm',
                   address='271 Burns Rd',
                   zipcode=14850,
                   city='Ithaca',
                   state='NY')
    s16 = Supplier(name='Kingdom Farms',
                   address='317 Auburn Rd',
                   zipcode=14882,
                   city='Lansing',
                   state='NY')
    s17 = Supplier(name='Straw Pocket Farm',
                   address='1388 Ridge Rd',
                   zipcode=14882,
                   city='Lansing',
                   state='NY')
    s18 = Supplier(name='Dygert Farms',
                   address='260 Central Chapel Rd',
                   zipcode=14817,
                   city='Brooktondale',
                   state='NY')
    s19 = Supplier(name='TC3 Farm',
                   address='100 Cortland Rd',
                   zipcode=13053,
                   city='Dryden',
                   state='NY')

    db.session.add_all([
        s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16,
        s17, s18, s19
    ])
    db.session.commit()

    p1 = Produce(
        name='Brocolli',
        imageRef='https://cdn.mos.cms.futurecdn.net/r8NK24bmcMgSib5zWKKQkW.jpg'
    )
    p2 = Produce(
        name='Spinach',
        imageRef=
        'https://i.ndtvimg.com/i/2016-11/spinach_620x350_81477995047.jpg')
    p3 = Produce(
        name='Kale',
        imageRef=
        'https://post.healthline.com/wp-content/uploads/2020/09/benefits-of-kale-1200x628-facebook-1200x628.jpg'
    )
    p4 = Produce(
        name='Pumpkin',
        imageRef=
        'https://post.medicalnewstoday.com/wp-content/uploads/sites/3/2020/02/279610_2200-732x549.jpg'
    )
    p5 = Produce(
        name='Straightneck Squash',
        imageRef=
        'https://d1nw62gticy6e9.cloudfront.net/uploads/Early-Prolific-Straightneck-Squash-Seeds.jpg'
    )
    p6 = Produce(
        name='Zucchini',
        imageRef=
        'https://www.jessicagavin.com/wp-content/uploads/2018/05/zucchini-2-1200.jpg'
    )
    p7 = Produce(
        name='Green Beans',
        imageRef=
        'https://images.food52.com/mrPh1x9qA6lTYKO27QJEfDjZ4Y8=/2016x1344/filters:format(webp)/ff7b7650-cacd-42b4-947c-e2e8ba90fa2a--greenbeans.jpg'
    )
    p8 = Produce(
        name='Lentils',
        imageRef=
        'https://cdn.loveandlemons.com/wp-content/uploads/2019/12/how-to-cook-lentils.jpg'
    )
    p9 = Produce(
        name='Peas',
        imageRef=
        'https://www.almanac.com/sites/default/files/image_nodes/peas-and-pea-pods.jpg'
    )
    p10 = Produce(
        name='Potatoes',
        imageRef=
        'https://cdn.cheapism.com/images/081516_national_potato_day_recipes_slide_0_f.max-800x600.jpg'
    )
    p11 = Produce(
        name='Corn',
        imageRef=
        'https://www.simplyhappyfoodie.com/wp-content/uploads/2018/04/instant-pot-corn-on-the-cob-1-500x500.jpg'
    )
    p12 = Produce(
        name='Soybean',
        imageRef=
        'https://www.johnnyseeds.com/dw/image/v2/BBBW_PRD/on/demandware.static/-/Sites-jss-master/default/dw3c4875f3/images/products/vegetables/02553_01_tohya.jpg?sw=1120'
    )
    p13 = Produce(
        name='Oats',
        imageRef=
        'https://post.healthline.com/wp-content/uploads/2020/03/oats-oatmeal-732x549-thumbnail.jpg'
    )
    p14 = Produce(
        name='Barley',
        imageRef=
        'https://cdn-prod.medicalnewstoday.com/content/images/articles/295/295268/barley-grains-in-a-wooden-bowl.jpg'
    )
    p15 = Produce(
        name='Flour',
        imageRef=
        'https://www.world-grain.com/ext/resources/Article-Images/2020/05/WholeWheatFlour_Photo-adobe-stock_E.jpg?t=1590171823&width=1080'
    )
    p16 = Produce(
        name='Turnip',
        imageRef=
        'https://upload.wikimedia.org/wikipedia/commons/d/d3/Turnip_2622027.jpg'
    )
    p17 = Produce(
        name='Lettuce',
        imageRef=
        'https://i0.wp.com/post.healthline.com/wp-content/uploads/2020/03/romaine-lettuce-1296x728-body.jpg?w=1155&h=1528'
    )
    p18 = Produce(
        name='Green Peppers',
        imageRef=
        'https://edge.bonnieplants.com/www/tiny/uploads/20200810205434/bonnie-s-green-bell-pepper.jpg'
    )
    p19 = Produce(name='Chili Peppers',
                  imageRef='https://scitechdaily.com/images/Chili-Peppers.jpg')
    p20 = Produce(
        name='Cucumber',
        imageRef=
        'https://www.shethepeople.tv/wp-content/uploads/2019/05/cucumber-e1558166231577.jpg'
    )

    db.session.add_all([
        p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15, p16,
        p17, p18, p19, p20
    ])
    db.session.commit()

    pk1 = Producetokeyword(kproduct=Produce.query.get(1),
                           tag=Keyword.query.get(1))
    pk2 = Producetokeyword(kproduct=Produce.query.get(1),
                           tag=Keyword.query.get(9))
    pk3 = Producetokeyword(kproduct=Produce.query.get(1),
                           tag=Keyword.query.get(21))
    pk4 = Producetokeyword(kproduct=Produce.query.get(2),
                           tag=Keyword.query.get(1))
    pk5 = Producetokeyword(kproduct=Produce.query.get(2),
                           tag=Keyword.query.get(18))
    pk6 = Producetokeyword(kproduct=Produce.query.get(2),
                           tag=Keyword.query.get(9))
    pk7 = Producetokeyword(kproduct=Produce.query.get(3),
                           tag=Keyword.query.get(1))
    pk8 = Producetokeyword(kproduct=Produce.query.get(3),
                           tag=Keyword.query.get(18))
    pk9 = Producetokeyword(kproduct=Produce.query.get(3),
                           tag=Keyword.query.get(9))
    pk10 = Producetokeyword(kproduct=Produce.query.get(4),
                            tag=Keyword.query.get(6))
    pk11 = Producetokeyword(kproduct=Produce.query.get(4),
                            tag=Keyword.query.get(11))
    pk12 = Producetokeyword(kproduct=Produce.query.get(4),
                            tag=Keyword.query.get(12))
    pk13 = Producetokeyword(kproduct=Produce.query.get(4),
                            tag=Keyword.query.get(16))
    pk14 = Producetokeyword(kproduct=Produce.query.get(5),
                            tag=Keyword.query.get(2))
    pk15 = Producetokeyword(kproduct=Produce.query.get(5),
                            tag=Keyword.query.get(11))
    pk16 = Producetokeyword(kproduct=Produce.query.get(5),
                            tag=Keyword.query.get(12))
    pk17 = Producetokeyword(kproduct=Produce.query.get(5),
                            tag=Keyword.query.get(16))
    pk18 = Producetokeyword(kproduct=Produce.query.get(6),
                            tag=Keyword.query.get(1))
    pk19 = Producetokeyword(kproduct=Produce.query.get(6),
                            tag=Keyword.query.get(11))
    pk20 = Producetokeyword(kproduct=Produce.query.get(6),
                            tag=Keyword.query.get(12))
    pk21 = Producetokeyword(kproduct=Produce.query.get(6),
                            tag=Keyword.query.get(16))
    pk22 = Producetokeyword(kproduct=Produce.query.get(7),
                            tag=Keyword.query.get(1))
    pk23 = Producetokeyword(kproduct=Produce.query.get(7),
                            tag=Keyword.query.get(13))
    pk24 = Producetokeyword(kproduct=Produce.query.get(7),
                            tag=Keyword.query.get(9))
    pk25 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(1))
    pk26 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(2))
    pk27 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(3))
    pk28 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(4))
    pk29 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(6))
    pk30 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(7))
    pk31 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(8))
    pk32 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(13))
    pk33 = Producetokeyword(kproduct=Produce.query.get(8),
                            tag=Keyword.query.get(9))
    pk34 = Producetokeyword(kproduct=Produce.query.get(9),
                            tag=Keyword.query.get(1))
    pk35 = Producetokeyword(kproduct=Produce.query.get(9),
                            tag=Keyword.query.get(9))
    pk36 = Producetokeyword(kproduct=Produce.query.get(9),
                            tag=Keyword.query.get(13))
    pk37 = Producetokeyword(kproduct=Produce.query.get(9),
                            tag=Keyword.query.get(16))
    pk38 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(4))
    pk39 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(3))
    pk40 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(2))
    pk41 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(7))
    pk42 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(10))
    pk43 = Producetokeyword(kproduct=Produce.query.get(10),
                            tag=Keyword.query.get(16))
    pk44 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(2))
    pk45 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(16))
    pk46 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(20))
    pk47 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(14))
    pk48 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(19))
    pk49 = Producetokeyword(kproduct=Produce.query.get(11),
                            tag=Keyword.query.get(9))
    pk50 = Producetokeyword(kproduct=Produce.query.get(12),
                            tag=Keyword.query.get(9))
    pk51 = Producetokeyword(kproduct=Produce.query.get(12),
                            tag=Keyword.query.get(13))
    pk52 = Producetokeyword(kproduct=Produce.query.get(12),
                            tag=Keyword.query.get(1))
    pk53 = Producetokeyword(kproduct=Produce.query.get(13),
                            tag=Keyword.query.get(19))
    pk54 = Producetokeyword(kproduct=Produce.query.get(13),
                            tag=Keyword.query.get(20))
    pk55 = Producetokeyword(kproduct=Produce.query.get(13),
                            tag=Keyword.query.get(10))
    pk56 = Producetokeyword(kproduct=Produce.query.get(13),
                            tag=Keyword.query.get(7))
    pk57 = Producetokeyword(kproduct=Produce.query.get(14),
                            tag=Keyword.query.get(19))
    pk58 = Producetokeyword(kproduct=Produce.query.get(14),
                            tag=Keyword.query.get(20))
    pk59 = Producetokeyword(kproduct=Produce.query.get(14),
                            tag=Keyword.query.get(10))
    pk60 = Producetokeyword(kproduct=Produce.query.get(14),
                            tag=Keyword.query.get(7))
    pk61 = Producetokeyword(kproduct=Produce.query.get(15),
                            tag=Keyword.query.get(19))
    pk62 = Producetokeyword(kproduct=Produce.query.get(15),
                            tag=Keyword.query.get(20))
    pk63 = Producetokeyword(kproduct=Produce.query.get(15),
                            tag=Keyword.query.get(10))
    pk64 = Producetokeyword(kproduct=Produce.query.get(15),
                            tag=Keyword.query.get(4))
    pk65 = Producetokeyword(kproduct=Produce.query.get(16),
                            tag=Keyword.query.get(17))
    pk66 = Producetokeyword(kproduct=Produce.query.get(16),
                            tag=Keyword.query.get(3))
    pk67 = Producetokeyword(kproduct=Produce.query.get(16),
                            tag=Keyword.query.get(4))
    pk68 = Producetokeyword(kproduct=Produce.query.get(16),
                            tag=Keyword.query.get(9))
    pk69 = Producetokeyword(kproduct=Produce.query.get(16),
                            tag=Keyword.query.get(16))
    pk70 = Producetokeyword(kproduct=Produce.query.get(17),
                            tag=Keyword.query.get(1))
    pk71 = Producetokeyword(kproduct=Produce.query.get(17),
                            tag=Keyword.query.get(9))
    pk72 = Producetokeyword(kproduct=Produce.query.get(17),
                            tag=Keyword.query.get(18))
    pk73 = Producetokeyword(kproduct=Produce.query.get(18),
                            tag=Keyword.query.get(1))
    pk74 = Producetokeyword(kproduct=Produce.query.get(18),
                            tag=Keyword.query.get(16))
    pk75 = Producetokeyword(kproduct=Produce.query.get(18),
                            tag=Keyword.query.get(9))
    pk76 = Producetokeyword(kproduct=Produce.query.get(19),
                            tag=Keyword.query.get(15))
    pk77 = Producetokeyword(kproduct=Produce.query.get(19),
                            tag=Keyword.query.get(16))
    pk78 = Producetokeyword(kproduct=Produce.query.get(19),
                            tag=Keyword.query.get(3))
    pk79 = Producetokeyword(kproduct=Produce.query.get(19),
                            tag=Keyword.query.get(9))
    pk80 = Producetokeyword(kproduct=Produce.query.get(20),
                            tag=Keyword.query.get(1))
    pk81 = Producetokeyword(kproduct=Produce.query.get(20),
                            tag=Keyword.query.get(16))
    pk82 = Producetokeyword(kproduct=Produce.query.get(20),
                            tag=Keyword.query.get(9))
    pk83 = Producetokeyword(kproduct=Produce.query.get(20),
                            tag=Keyword.query.get(12))
    pk84 = Producetokeyword(kproduct=Produce.query.get(20),
                            tag=Keyword.query.get(11))

    db.session.add_all([
        pk1, pk2, pk3, pk4, pk5, pk6, pk7, pk8, pk9, pk10, pk11, pk12, pk13,
        pk14, pk15, pk16, pk17, pk18, pk19, pk20, pk21, pk22, pk23, pk24, pk25,
        pk26, pk27, pk28, pk29, pk30, pk31, pk32, pk33, pk34, pk35, pk36, pk37,
        pk38, pk39, pk40, pk41, pk42, pk43, pk44, pk45, pk46, pk47, pk48, pk49,
        pk50, pk51, pk52, pk53, pk54, pk55, pk56, pk57, pk58, pk59, pk60, pk61,
        pk62, pk63, pk64, pk65, pk66, pk67, pk68, pk69, pk70, pk71, pk72, pk73,
        pk74, pk75, pk76, pk77, pk78, pk79, pk80, pk81, pk82, pk83, pk84
    ])
    db.session.commit()

    l1 = Listing(price=2.56,
                 quantity=551,
                 lproduct=Produce.query.get(2),
                 owner=Supplier.query.get(16))
    l2 = Listing(price=3.27,
                 quantity=1059,
                 lproduct=Produce.query.get(1),
                 owner=Supplier.query.get(4))
    l3 = Listing(price=0.59,
                 quantity=710,
                 lproduct=Produce.query.get(2),
                 owner=Supplier.query.get(15))
    l4 = Listing(price=2.59,
                 quantity=535,
                 lproduct=Produce.query.get(9),
                 owner=Supplier.query.get(10))
    l5 = Listing(price=2.04,
                 quantity=682,
                 lproduct=Produce.query.get(6),
                 owner=Supplier.query.get(3))
    l6 = Listing(price=3.3,
                 quantity=1254,
                 lproduct=Produce.query.get(6),
                 owner=Supplier.query.get(7))
    l7 = Listing(price=3.19,
                 quantity=612,
                 lproduct=Produce.query.get(10),
                 owner=Supplier.query.get(5))
    l8 = Listing(price=1.22,
                 quantity=748,
                 lproduct=Produce.query.get(14),
                 owner=Supplier.query.get(4))
    l9 = Listing(price=1.83,
                 quantity=1236,
                 lproduct=Produce.query.get(15),
                 owner=Supplier.query.get(10))

    l11 = Listing(price=2.38,
                  quantity=460,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(15))
    l12 = Listing(price=3.02,
                  quantity=588,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(19))
    l13 = Listing(price=2.29,
                  quantity=231,
                  lproduct=Produce.query.get(10),
                  owner=Supplier.query.get(14))
    l14 = Listing(price=1.84,
                  quantity=717,
                  lproduct=Produce.query.get(17),
                  owner=Supplier.query.get(1))

    l16 = Listing(price=0.75,
                  quantity=709,
                  lproduct=Produce.query.get(14),
                  owner=Supplier.query.get(18))
    l17 = Listing(price=0.51,
                  quantity=826,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(4))
    l18 = Listing(price=1.32,
                  quantity=623,
                  lproduct=Produce.query.get(13),
                  owner=Supplier.query.get(17))

    l20 = Listing(price=1.19,
                  quantity=996,
                  lproduct=Produce.query.get(2),
                  owner=Supplier.query.get(7))
    l21 = Listing(price=1.73,
                  quantity=931,
                  lproduct=Produce.query.get(14),
                  owner=Supplier.query.get(5))
    l22 = Listing(price=0.81,
                  quantity=166,
                  lproduct=Produce.query.get(8),
                  owner=Supplier.query.get(8))
    l23 = Listing(price=2.68,
                  quantity=204,
                  lproduct=Produce.query.get(3),
                  owner=Supplier.query.get(12))
    l24 = Listing(price=3.02,
                  quantity=615,
                  lproduct=Produce.query.get(9),
                  owner=Supplier.query.get(9))
    l25 = Listing(price=1.73,
                  quantity=832,
                  lproduct=Produce.query.get(4),
                  owner=Supplier.query.get(13))
    l26 = Listing(price=1.53,
                  quantity=181,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(1))
    l27 = Listing(price=0.79,
                  quantity=769,
                  lproduct=Produce.query.get(15),
                  owner=Supplier.query.get(13))

    l29 = Listing(price=3.28,
                  quantity=962,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(11))
    l30 = Listing(price=1.1,
                  quantity=1198,
                  lproduct=Produce.query.get(20),
                  owner=Supplier.query.get(6))
    l31 = Listing(price=2.44,
                  quantity=259,
                  lproduct=Produce.query.get(10),
                  owner=Supplier.query.get(15))
    l32 = Listing(price=3.04,
                  quantity=180,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(5))
    l33 = Listing(price=1.8,
                  quantity=330,
                  lproduct=Produce.query.get(1),
                  owner=Supplier.query.get(17))
    l34 = Listing(price=1.94,
                  quantity=353,
                  lproduct=Produce.query.get(1),
                  owner=Supplier.query.get(18))
    l35 = Listing(price=3.22,
                  quantity=890,
                  lproduct=Produce.query.get(11),
                  owner=Supplier.query.get(6))

    l38 = Listing(price=0.87,
                  quantity=1057,
                  lproduct=Produce.query.get(15),
                  owner=Supplier.query.get(15))
    l39 = Listing(price=1.64,
                  quantity=956,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(5))
    l40 = Listing(price=3.28,
                  quantity=670,
                  lproduct=Produce.query.get(17),
                  owner=Supplier.query.get(6))
    l41 = Listing(price=1.92,
                  quantity=567,
                  lproduct=Produce.query.get(12),
                  owner=Supplier.query.get(1))
    l42 = Listing(price=0.68,
                  quantity=1145,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(11))
    l43 = Listing(price=2.71,
                  quantity=885,
                  lproduct=Produce.query.get(18),
                  owner=Supplier.query.get(9))
    l44 = Listing(price=0.69,
                  quantity=190,
                  lproduct=Produce.query.get(15),
                  owner=Supplier.query.get(17))
    l45 = Listing(price=2,
                  quantity=1109,
                  lproduct=Produce.query.get(5),
                  owner=Supplier.query.get(2))
    l46 = Listing(price=1.63,
                  quantity=354,
                  lproduct=Produce.query.get(7),
                  owner=Supplier.query.get(13))

    l48 = Listing(price=1.67,
                  quantity=763,
                  lproduct=Produce.query.get(20),
                  owner=Supplier.query.get(5))
    l49 = Listing(price=2.21,
                  quantity=716,
                  lproduct=Produce.query.get(4),
                  owner=Supplier.query.get(19))
    l50 = Listing(price=2.47,
                  quantity=383,
                  lproduct=Produce.query.get(13),
                  owner=Supplier.query.get(8))
    l51 = Listing(price=3.11,
                  quantity=1229,
                  lproduct=Produce.query.get(17),
                  owner=Supplier.query.get(9))
    l52 = Listing(price=2.79,
                  quantity=911,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(16))
    l53 = Listing(price=2.24,
                  quantity=635,
                  lproduct=Produce.query.get(3),
                  owner=Supplier.query.get(18))
    l54 = Listing(price=2.08,
                  quantity=999,
                  lproduct=Produce.query.get(7),
                  owner=Supplier.query.get(10))
    l55 = Listing(price=2.87,
                  quantity=896,
                  lproduct=Produce.query.get(8),
                  owner=Supplier.query.get(16))
    l56 = Listing(price=0.67,
                  quantity=251,
                  lproduct=Produce.query.get(16),
                  owner=Supplier.query.get(7))
    l57 = Listing(price=2.41,
                  quantity=508,
                  lproduct=Produce.query.get(12),
                  owner=Supplier.query.get(19))
    l58 = Listing(price=3.1,
                  quantity=165,
                  lproduct=Produce.query.get(14),
                  owner=Supplier.query.get(11))
    l59 = Listing(price=3.2,
                  quantity=1068,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(19))
    l60 = Listing(price=2.98,
                  quantity=846,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(12))
    l61 = Listing(price=1.91,
                  quantity=481,
                  lproduct=Produce.query.get(1),
                  owner=Supplier.query.get(10))
    l62 = Listing(price=2.37,
                  quantity=1165,
                  lproduct=Produce.query.get(18),
                  owner=Supplier.query.get(12))
    l63 = Listing(price=1.18,
                  quantity=634,
                  lproduct=Produce.query.get(1),
                  owner=Supplier.query.get(16))

    l65 = Listing(price=1.46,
                  quantity=1143,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(13))
    l66 = Listing(price=1.38,
                  quantity=491,
                  lproduct=Produce.query.get(20),
                  owner=Supplier.query.get(11))
    l67 = Listing(price=0.69,
                  quantity=331,
                  lproduct=Produce.query.get(14),
                  owner=Supplier.query.get(9))
    l68 = Listing(price=3.46,
                  quantity=809,
                  lproduct=Produce.query.get(11),
                  owner=Supplier.query.get(14))

    l70 = Listing(price=3.2,
                  quantity=1083,
                  lproduct=Produce.query.get(5),
                  owner=Supplier.query.get(3))
    l71 = Listing(price=1.21,
                  quantity=318,
                  lproduct=Produce.query.get(14),
                  owner=Supplier.query.get(1))
    l72 = Listing(price=2.29,
                  quantity=544,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(9))
    l73 = Listing(price=2.11,
                  quantity=207,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(17))
    l74 = Listing(price=0.64,
                  quantity=882,
                  lproduct=Produce.query.get(9),
                  owner=Supplier.query.get(15))
    l75 = Listing(price=2.59,
                  quantity=185,
                  lproduct=Produce.query.get(18),
                  owner=Supplier.query.get(16))
    l76 = Listing(price=1.57,
                  quantity=1143,
                  lproduct=Produce.query.get(18),
                  owner=Supplier.query.get(10))

    l78 = Listing(price=1.7,
                  quantity=934,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(10))
    l79 = Listing(price=0.65,
                  quantity=501,
                  lproduct=Produce.query.get(11),
                  owner=Supplier.query.get(19))
    l80 = Listing(price=1.84,
                  quantity=741,
                  lproduct=Produce.query.get(3),
                  owner=Supplier.query.get(10))

    l83 = Listing(price=3.31,
                  quantity=784,
                  lproduct=Produce.query.get(9),
                  owner=Supplier.query.get(14))
    l84 = Listing(price=0.57,
                  quantity=106,
                  lproduct=Produce.query.get(17),
                  owner=Supplier.query.get(8))
    l85 = Listing(price=2.99,
                  quantity=1225,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(7))
    l86 = Listing(price=1.13,
                  quantity=865,
                  lproduct=Produce.query.get(6),
                  owner=Supplier.query.get(19))
    l87 = Listing(price=1.91,
                  quantity=560,
                  lproduct=Produce.query.get(3),
                  owner=Supplier.query.get(19))
    l88 = Listing(price=3.45,
                  quantity=883,
                  lproduct=Produce.query.get(17),
                  owner=Supplier.query.get(16))
    l89 = Listing(price=2.34,
                  quantity=326,
                  lproduct=Produce.query.get(15),
                  owner=Supplier.query.get(19))
    l90 = Listing(price=2.45,
                  quantity=110,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(5))
    l91 = Listing(price=1.49,
                  quantity=230,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(6))
    l92 = Listing(price=2.61,
                  quantity=889,
                  lproduct=Produce.query.get(19),
                  owner=Supplier.query.get(2))
    l93 = Listing(price=0.72,
                  quantity=436,
                  lproduct=Produce.query.get(10),
                  owner=Supplier.query.get(7))
    l94 = Listing(price=1.58,
                  quantity=1231,
                  lproduct=Produce.query.get(10),
                  owner=Supplier.query.get(10))

    l96 = Listing(price=1.19,
                  quantity=582,
                  lproduct=Produce.query.get(12),
                  owner=Supplier.query.get(6))
    l97 = Listing(price=3.4,
                  quantity=972,
                  lproduct=Produce.query.get(3),
                  owner=Supplier.query.get(14))
    l98 = Listing(price=1.95,
                  quantity=1260,
                  lproduct=Produce.query.get(13),
                  owner=Supplier.query.get(15))

    l100 = Listing(price=1.81,
                   quantity=226,
                   lproduct=Produce.query.get(7),
                   owner=Supplier.query.get(12))

    db.session.add_all([
        l1, l2, l3, l4, l5, l6, l7, l8, l9, l11, l12, l13, l14, l16, l17, l18,
        l20, l21, l22, l23, l24, l25, l26, l27, l29, l30, l31, l32, l33, l34,
        l35, l38, l39, l40, l41, l42, l43, l44, l45, l46, l48, l49, l50, l51,
        l52, l53, l54, l55, l56, l57, l58, l59, l60, l61, l62, l63, l65, l66,
        l67, l68, l70, l71, l72, l73, l74, l75, l76, l78, l79, l80, l83, l84,
        l85, l86, l87, l88, l89, l90, l91, l92, l93, l94, l96, l97, l98, l100
    ])
    db.session.commit()

    return "Database has been populated."
Beispiel #19
0
def analyze_keyword(k):
    """
    分析站点级别的关键字, 获取其百度指数以及其相关的长尾关键字.
    """
    keyword = Keyword.find_one({'name': k})
    if not keyword:
        print 'Keyword %s does not exist' % k
        return
    if keyword.baiduIndex > 0 or keyword.baiduResult > 0:
        print 'Keyword %s is imported before' % k
        return
    print 'Try to analyze keyword %s/%s' % (keyword._id, k)

    ss.headers['Referer'] = 'http://www.5118.com/'
    t = ss.get('http://www.5118.com/seo/words/%s' % url_quote(k)).text
    tree = html.fromstring(t)
    dls = tree.xpath('//div[@class="Fn-ui-list dig-list"]/dl')
    total = len(dls)
    for dl in dls:
        if dl.get('class', '') == 'dl-word':
            continue
        name = unicode(dl.xpath('./dd[1]//a[1]/@title')[0].strip())
        baidu_index = dl.xpath('./dd[2]/text()')[0].strip()
        baidu_result = dl.xpath('./dd[3]/text()')[0].strip()
        if not baidu_index.isdigit():
            baidu_index = 0
        if not baidu_result.isdigit():
            baidu_result = 0
        print 'Found keyword: %s/%s/%s' % (name, baidu_index, baidu_result)

        if name == k:
            keyword.baiduIndex = int(baidu_index)
            keyword.baiduResult = int(baidu_result)
            if total > 2:
                keyword.total = total - 2
            keyword.save()
        else:
            if Keyword.count({'name': name}) > 0:
                print 'This keyword already exists'
                continue
            long_tail = Keyword()
            long_tail.name = name
            long_tail.level = KeywordLevel.LONG_TAIL
            long_tail.parentId = keyword._id
            long_tail.baiduIndex = int(baidu_index)
            long_tail.baiduResult = int(baidu_result)
            long_tail.save()

    time.sleep(random.randint(5, 15))
Beispiel #20
0
def recommend():
    '''function is to create the recommender system for view 3'''
    user_id = current_user
    rec_sys_routes.news_panda()
    # show_news_db(date_list=[], category_list=[])  <----not finished just shows full db
    # num_users = 10
    # create_random_user_db(num_users)
    keywords = Keyword().query.filter(Keyword.id_user == current_user.id)
    user_key_words = []

    for item in keywords:
        user_key_words = item.body.split(' ')
    user_key_words = set(user_key_words)
    user_key_words = list(user_key_words)
    punctuation_elements = [',', '.', ':', ';', ' ', '', '/']
    for element in punctuation_elements:
        if element in user_key_words:
            user_key_words.remove(element)
    # 1:["Corona","Brexit"],
    # 2:["Music","Golf","Environment"],
    # 3:["Space","Tesla","Bitcoin"],
    # 4:["Stocks","Formula","Liberal", "Democrats"]}
    #print(f'user key words are: {user_key_words}')

    # date = "20200430"
    date = "20200515"
    rec_sys_routes.create_key_word_user_db(user_key_words, date)

    recs_XL_list = []
    # #loads a user, initialises attributes story history id's and creates TFIDF dict of history
    user = rec_sys_routes.User_recommend(
        user_key_words, date, w2v_model="news_w2v")  #"GoogleNews","news_w2v"
    # recs_XL_list.append(user.show_recommendations())
    recommendations = user.recommendations

    # print(f'recommendations are: {recommendations}')

    titles = recommendations['title'].tolist()
    summaries = recommendations['summary'].tolist()
    outlets = recommendations['Source'].tolist()
    links = recommendations['link'].tolist()
    scores = recommendations['NN_counts'].tolist()

    total_recommender = []
    for i in range(len(titles)):
        if int(scores[i]) > 0:
            total_recommender.append(
                (titles[i], summaries[i], outlets[i], links[i], scores[i]))

    recommended_list = []

    total_recommender = sorted(total_recommender, key=lambda x: x[4])

    for item in total_recommender:
        new = News.query.filter(News.title == item[0])
        #print(f'scores listed is: {item[4]}')
        link = list(new)[0].link
        outlet = list(new)[0].outlet
        time = list(new)[0].date
        title = list(new)[0].title
        summary = list(new)[0].summary
        language = list(new)[0].language
        if len(summary) > 300:
            summary = summary[0:300] + '...'
        ## to deal with cartoons ##
        elif 'cartoon' in title:
            title = title.replace('- cartoon', '')
            summary = f'Cartoon: {title}'
        outlet = list(new)[0].outlet
        pic_link = list(new)[0].pic_link
        if pic_link == None:
            pic_link = "https://static.bbc.co.uk/news/1.312.03569/img/brand/generated/news-light.png"
        if outlet == 'theguardian':
            outlet = 'The Guardian'
            summary = title
        if outlet == 'Ger':
            outlet = 'Der Spiegel'
        if outlet == 'Spa':
            outlet = 'El Pais'
        recommended_list.append(
            (link, title, summary, outlet, language, new, pic_link))
    recommended_list.reverse()
    # print(f'total_recommender is: {total_recommender}')
    return render_template('recommend.html', recommended=recommended_list)
Beispiel #21
0
    def test_classifier(self):

        default_req = [Request(id=0, url=' ', status='done')]

        default_kw_armas = [
            Keyword(word='arma', requests=[default_req[0]]),
            Keyword(word='munição', requests=[default_req[0]]),
            Keyword(word='calibre', requests=[default_req[0]]),
            Keyword(word='revólver', requests=[default_req[0]]),
            Keyword(word='cano', requests=[default_req[0]]),
            Keyword(word='carabina', requests=[default_req[0]]),
            Keyword(word='espingarda', requests=[default_req[0]])
        ]

        default_kw_cigarros = [
            Keyword(word='cigarro', requests=[default_req[0]]),
            Keyword(word='vape', requests=[default_req[0]]),
            Keyword(word='narguile', requests=[default_req[0]]),
            Keyword(word='fumar', requests=[default_req[0]]),
            Keyword(word='tragar', requests=[default_req[0]]),
            Keyword(word='tabaco', requests=[default_req[0]]),
            Keyword(word='nicotina', requests=[default_req[0]]),
            Keyword(word='vaporizador', requests=[default_req[0]]),
            Keyword(word='ervas', requests=[default_req[0]])
        ]

        default_kw_prost = [
            Keyword(word='sexo', requests=[default_req[0]]),
            Keyword(word='prostituta', requests=[default_req[0]]),
            Keyword(word='fetiche', requests=[default_req[0]]),
            Keyword(word='cache', requests=[default_req[0]]),
            Keyword(word='acompanhante', requests=[default_req[0]]),
            Keyword(word='programa', requests=[default_req[0]]),
            Keyword(word='seios', requests=[default_req[0]]),
            Keyword(word='bunda', requests=[default_req[0]]),
            Keyword(word='travesti', requests=[default_req[0]]),
            Keyword(word='gostosa', requests=[default_req[0]])
        ]

        default_kw_remedio = [
            Keyword(word='remédio', requests=[default_req[0]]),
            Keyword(word='aborto', requests=[default_req[0]]),
            Keyword(word='comprimido', requests=[default_req[0]]),
            Keyword(word='secundários', requests=[default_req[0]]),
            Keyword(word='efeitos', requests=[default_req[0]])
        ]

        default_kw_serv = [
            Keyword(word='operadora', requests=[default_req[0]]),
            Keyword(word='cabo', requests=[default_req[0]]),
            Keyword(word='assinatura', requests=[default_req[0]]),
            Keyword(word='liberação', requests=[default_req[0]]),
            Keyword(word='sem', requests=[default_req[0]]),
            Keyword(word='aparelhos', requests=[default_req[0]]),
            Keyword(word='net', requests=[default_req[0]]),
            Keyword(word='vivo', requests=[default_req[0]])
        ]

        labels = [
            Label(name='Armas de fogo',
                  restrict=True,
                  keywords=default_kw_armas),
            Label(name='Cigarros', restrict=True,
                  keywords=default_kw_cigarros),
            Label(name='Prostutuição',
                  restrict=True,
                  keywords=default_kw_prost),
            Label(name='Remédios', restrict=True, keywords=default_kw_remedio),
            Label(name='Serviços ilegais',
                  restrict=True,
                  keywords=default_kw_serv),
            Label(name='Site permitido', restrict=False, keywords=[])
        ]

        kws = default_kw_armas + default_kw_cigarros + default_kw_prost + default_kw_remedio + default_kw_serv
        stat = []
        for status in self.classifier.classify(url='https://www.uol.com.br/',
                                               kws=kws,
                                               labels=labels):
            stat += [status]
        self.assertEqual(stat[-1]['label'], "permitted")
Beispiel #22
0
"""
Populates the system's database tables with pre-defined Keywords and Labels to be used in the classifer.
"""

try:
    db.session.query(Request).delete()
    db.session.query(Keyword).delete()
    db.session.query(Label).delete()
    db.session.commit()
except:
    print("tried to clean tables, but they don't yet exist")

default_req = [Request(id=0, url=' ', status='done')]

default_kw_armas = [
    Keyword(word='arma', requests=[default_req[0]]),
    Keyword(word='munição', requests=[default_req[0]]),
    Keyword(word='calibre', requests=[default_req[0]]),
    Keyword(word='revólver', requests=[default_req[0]]),
    Keyword(word='cano', requests=[default_req[0]]),
    Keyword(word='carabina', requests=[default_req[0]]),
    Keyword(word='espingarda', requests=[default_req[0]])
]

default_kw_cigarros = [
    Keyword(word='cigarro', requests=[default_req[0]]),
    Keyword(word='vape', requests=[default_req[0]]),
    Keyword(word='narguile', requests=[default_req[0]]),
    Keyword(word='fumar', requests=[default_req[0]]),
    Keyword(word='tragar', requests=[default_req[0]]),
    Keyword(word='tabaco', requests=[default_req[0]]),
def seed():
    users = User.query.all()
    if len(users) == 0:
        for i in range(1, 611):
            email = randomEmail()
            u = User(id=i, email=email)
            u.hash_password("123456")
            db.session.add(u)

    ratings = Rating.query.all()
    if len(ratings) == 0:
        rt = pd.read_csv("dataset/ratings.csv")

        for index, row in rt.iterrows():
            time = datetime.datetime.fromtimestamp(1347517370)
            rating = int(round(row['rating']))

            r = Rating(rating=rating,
                       timestamp=time,
                       user_id=row['userId'],
                       movie_id=row['movieId'])
            db.session.add(r)

    movies = Movie.query.all()
    if len(movies) == 0:
        mv = pd.read_csv("dataset/movies.csv")
        links = pd.read_csv("dataset/links.csv")

        for index, row in mv.iterrows():
            tmdb_id = links.loc[index, 'tmdbId']
            id = row['movieId']

            url = "https://api.themoviedb.org/3/movie/" + \
                str(tmdb_id) + "?api_key=" + app.config['API_KEY']
            credits_url = "https://api.themoviedb.org/3/movie/" + \
                str(tmdb_id) + "/credits?api_key=" + app.config['API_KEY']
            keywords_url = "https://api.themoviedb.org/3/movie/" + \
                str(tmdb_id) + "/keywords?api_key=" + app.config['API_KEY']
            release_url = "https://api.themoviedb.org/3/movie/" + \
                str(tmdb_id) + "/release_dates?api_key=" + \
                app.config['API_KEY']
            video_url = "https://api.themoviedb.org/3/movie/" + \
                str(tmdb_id) + "/videos?api_key=" + \
                app.config['API_KEY']

            result = requests.get(url)
            res = result.json() if result.status_code == 200 else None

            credits_res = requests.get(credits_url)
            credits = credits_res.json(
            ) if credits_res.status_code == 200 else None

            keywords_res = requests.get(keywords_url)
            keywords = keywords_res.json(
            )['keywords'] if keywords_res.status_code == 200 else []

            releases_res = requests.get(release_url)
            releases = releases_res.json(
            ) if releases_res.status_code == 200 else None

            videos_res = requests.get(video_url)
            videos = videos_res.json(
            )['results'] if videos_res.status_code == 200 else []

            poster_path = app.config['IMG_URL'] + \
                str(res['poster_path']
                    ) if res is not None else app.config['IMG_DEFAULT']
            backdrop_path = app.config['IMG_URL'] + \
                str(res['backdrop_path']
                    ) if res is not None else app.config['BACKDROP_DEFAULT']
            original_title = res['original_title'] if res is not None else row[
                'title']
            vote_average = res['vote_average'] if res is not None else 0
            vote_count = res['vote_count'] if res is not None else 0
            runtime = res['runtime'] if res is not None else 0
            genres = res['genres'] if res is not None else []
            release_date = res['release_date'] if res is not None else '2000'
            overview = res['overview'] if res is not None else ''

            certification = "G"

            if releases is not None:
                release_results = releases['results']

                if len(release_results) > 0:
                    release_dates = release_results[0]['release_dates']

                    if len(release_dates) > 0:
                        certification = release_dates[0]['certification']

            casts = credits['cast'] if credits is not None else []
            crews = credits['crew'] if credits is not None else []

            total = 0
            rating_list = Rating.query.filter_by(movie_id=id).all()
            for r in rating_list:
                total += r.rating
            avg = total / len(rating_list) if len(rating_list) > 0 else 0

            m = Movie(id=id,
                      title=row['title'],
                      original_title=original_title,
                      tmdb_id=tmdb_id,
                      rating=avg,
                      backdrop_path=backdrop_path,
                      poster_path=poster_path,
                      release_date=release_date,
                      runtime=runtime,
                      overview=overview,
                      vote_average=vote_average,
                      vote_count=vote_count,
                      certification=certification)
            db.session.add(m)
            db.session.flush()
            db.session.refresh(m)

            for crew in crews:
                c = Crew.query.filter_by(name=crew['name']).first()

                if c is None:
                    new_crew = Crew(name=crew['name'])
                    db.session.add(new_crew)
                    db.session.flush()
                    db.session.refresh(new_crew)

                    credit_crew = CreditCrews(movie_id=m.id,
                                              crew_id=new_crew.id,
                                              department=crew['department'])
                    db.session.add(credit_crew)
                else:
                    credit_crew = CreditCrews(movie_id=m.id,
                                              crew_id=c.id,
                                              department=crew['department'])
                    db.session.add(credit_crew)

            for cast in casts:
                c = Cast.query.filter_by(name=cast['name']).first()

                if c is None:
                    image = app.config['IMG_URL'] + str(cast['profile_path'])
                    new_cast = Cast(name=cast['name'], image=image)
                    db.session.add(new_cast)
                    db.session.flush()
                    db.session.refresh(new_cast)

                    credit_cast = CreditCasts(character=cast['character'],
                                              movie_id=m.id,
                                              cast_id=new_cast.id,
                                              order=cast['order'])
                    db.session.add(credit_cast)
                else:
                    credit_cast = CreditCasts(character=cast['character'],
                                              movie_id=m.id,
                                              cast_id=c.id,
                                              order=cast['order'])
                    db.session.add(credit_cast)

            for video in videos:
                v = Video(key=video['key'], name=video['name'], movie_id=m.id)
                db.session.add(v)

            for keyword in keywords:
                k = Keyword.query.filter_by(name=keyword['name']).first()

                if k is None:
                    new_keyword = Keyword(name=keyword['name'])
                    db.session.add(new_keyword)
                    db.session.flush()
                    db.session.refresh(new_keyword)

                    key = MovieKeywords(movie_id=m.id,
                                        keyword_id=new_keyword.id)
                    db.session.add(key)
                else:
                    key = MovieKeywords(movie_id=m.id, keyword_id=k.id)
                    db.session.add(key)

            for genre in genres:
                g = Genre.query.filter_by(name=genre['name']).first()

                if g is None:
                    new_genre = Genre(name=genre['name'])
                    db.session.add(new_genre)
                    db.session.flush()
                    db.session.refresh(new_genre)

                    gen = MovieGenres(movie_id=m.id, genre_id=new_genre.id)
                    db.session.add(gen)
                else:
                    gen = MovieGenres(movie_id=m.id, genre_id=g.id)
                    db.session.add(gen)

            db.session.commit()

    actions = ActionType.query.all()
    if len(actions) == 0:
        with open('seed/ActionType.json') as json_file:
            data = json.load(json_file)

            for action in data:
                action_type = ActionType(id=action['id'],
                                         name=action['name'],
                                         description=action['description'])
                db.session.add(action_type)

        db.session.commit()

    parameters = ActionParameter.query.all()
    if len(parameters) == 0:
        with open('seed/ActionParameter.json') as json_file:
            data = json.load(json_file)

            for action in data:
                action_parameter = ActionParameter(id=action['id'],
                                                   name=action['name'])
                db.session.add(action_parameter)

        db.session.commit()