Exemple #1
0
def getsettings():
    db = DB()
    configs = list()
    for each in db._select2dic("scraper_setting", order="id desc"):
        active_status = "active"
        if each['active'] == 0:
            active_status = "inactive"
        each['active_status'] = active_status
        configs.append(each)
    html = []
    for each in configs:
        item_html = '''
        <tr>
            <td>%s</td>
            <td>%s<br><a id="switch" href="#" data-id="%s"
                                                    data-active="%s">Switch</a></td>
            <td>%s</td>
            <td>
                <a href="/deleteconfig?id=%s">delete</a>
                <a href="/editconfig?id=%s">edit</a>
            </td>
            <td>%s</td>

        </tr>
        ''' % (each['source'], each['active_status'], each['id'], each['active'], each['spider_ip'], each['id'],
               each['id'], each['url'])
        html.append(item_html)
    return jsonify({"html": ''.join(html)})
Exemple #2
0
def serverstatus():
    db = DB()
    configs = list()
    config_dict = dict()
    for each in db._select2dic("scraper_setting",
                               where="spider_ip is not null and spider_ip != '' and spider_status != ''"):
        if each['last_full_scan_time'] > 0:
            each['last_full_scan_time'] = datetime.datetime.fromtimestamp(each['last_full_scan_time']).strftime(
                '%Y-%m-%d %H:%M:%S')
        url = each['url']
        if url:
            if 'craigslist' in url:
                keyword = parse_qs(urlparse(url).query)['query'][0].split('-')[0].strip()
            elif 'kijiji' in url:
                keyword = urlparse(url).path.split('/')[3].strip()
            else:
                keyword = parse_qs(urlparse(url).query)['_nkw'][0].strip()
        else:
            keyword = ""
        each['keyword'] = keyword
        configs.append(each)
        spider_ip = each['spider_ip']
        if config_dict.get(spider_ip):
            config_dict[spider_ip]['keyword'].append(keyword)
            config_dict[spider_ip]['spider_status'] = each['spider_status']
            config_dict[spider_ip]['last_full_scan_time'] = each['last_full_scan_time']
        else:
            config_dict[spider_ip] = {"keyword": [keyword], 'spider_status': each['spider_status'],
                                      'last_full_scan_time': each['last_full_scan_time']}

    for k, v in config_dict.items():
        v['keyword'] = ', '.join(v['keyword'])

    return render_template("serverstatus.html", configs=configs, config_dict=config_dict)
Exemple #3
0
 def get_keywords():
     db = DB()
     keywords = []
     for each in db._select2dic("scraper_craigslist", group="keyword", what=["keyword"]):
         keyword = each["keyword"]
         if keyword:
             keywords.append(keyword)
     return keywords
Exemple #4
0
def configlist():
    db = DB()
    configs = list()
    for each in db._select2dic("scraper_setting", order="id desc"):
        active_status = "active"
        if each['active'] == 0:
            active_status = "inactive"
        each['active_status'] = active_status
        configs.append(each)
    return render_template("configlist.html", configs=configs)
Exemple #5
0
def editconfig():
    id = request.args.get("id", type=int)
    if id:
        db = DB()
        try:
            ret = six.next(db._select2dic("scraper_setting", where="id = ?", where_values=[id]))
            form = AddConfigForm()

            if form.validate_on_submit():
                # spider_ip = form.spider_ip.data
                url = form.url.data
                active = form.active.data
                source = form.source.data
                if int(active) == 0:
                    db._update("scraper_setting", where="id=?", where_values=[id], source=source, url=url,
                               active=active, spider_ip="")
                else:
                    db._update("scraper_setting", where="id=?", where_values=[id], source=source, url=url,
                               active=active)
                db.commit()
                return redirect(url_for("configlist"))
            else:
                flash_errors(form)
            form.source.data = ret['source']
            # form.spider_ip.data = ret['spider_ip']
            form.url.data = ret['url']
            form.active.data = str(ret['active'])
            return render_template("addconfig.html", form=form)
        except StopIteration:
            return redirect(url_for("configlist"))
    return redirect(url_for("configlist"))
Exemple #6
0
def getconfig():
    if request.headers.getlist("X-Forwarded-For"):
        ip = request.headers.getlist("X-Forwarded-For")[0]
    else:
        ip = request.remote_addr
    sources = ["craigslist", "ebay", "kijiji", "facebook"]
    db = DB()
    data = []
    for source in sources:
        try:
            ret = six.next(
                db._select2dic("scraper_setting", where="source = ? and spider_ip = ? and active = 1",
                               where_values=[source, ip]))
            url = ret['url']
            id = ret['id']

        except:
            try:
                ret = six.next(
                    db._select2dic("scraper_setting", where="source = ? and active = 0", where_values=[source]))
                url = ret['url']
                id = ret['id']
                db._update("scraper_setting", where="id = ?", where_values=[id], active=1, spider_ip=ip)
                db.commit()
            except StopIteration:
                url = ""
                id = 0

        data.append({
            "url": url,
            "source": source,
            "id": id,
        })
    return jsonify(data), 200
Exemple #7
0
def archive_list():
    keyword = request.args.get("keyword", "")
    db = DB()
    if keyword:
        session_list = db._select2dic("scraper_archive",
                                      where_values=[keyword],
                                      order="created desc")
    else:
        session_list = db._select2dic("scraper_archive", order="created desc")
    data = []
    for each in session_list:
        each['created'] = pretty_date(int(each['created']))
        data.append(each)
    return render_template("archive_list.html", session_list=data, keyword=keyword)
Exemple #8
0
def deleteconfig():
    id = request.args.get("id", type=int)
    if id:
        db = DB()
        try:
            six.next(db._select2dic("scraper_setting", where="id = ?", where_values=[id]))
            db._delete("scraper_setting", where="id = ?", where_values=[id])
            db.commit()
            return redirect(url_for("configlist"))
        except StopIteration:
            return redirect(url_for("configlist"))
    return redirect(url_for("configlist"))
Exemple #9
0
def save():
    id = request.args.get("id", type=int)
    if id:
        db = DB()
        try:
            six.next(db._select2dic("scraper_craigslist", where="id = ?", where_values=[id]))
        except StopIteration:
            return jsonify({"code": 404, "message": "not found", "data": None})
        db._update("scraper_craigslist", where="id = ?", where_values=[id], is_save=1)
        db.commit()
        return jsonify({"code": 200, "message": "success", "data": None})
    return jsonify({"code": 500, "message": "missing id", "data": None})
Exemple #10
0
def getdata():
    keyword = request.args.get("keyword")
    db = DB()
    if keyword:
        session_list = db._select2dic("scraper_craigslist", where="keyword = ? and is_delete = 0 and is_archive = 0",
                                      where_values=[keyword],
                                      order="created desc", limit=100, offset=0)
    else:
        session_list = db._select2dic("scraper_craigslist",
                                      where="keyword is not null and is_delete = 0 and is_archive = 0",
                                      order="created desc", limit=100, offset=0)
    data = []
    for each in session_list:
        each['created'] = pretty_date(int(each['created']))
        data.append(each)
    html = []
    for each in data:
        item_html = '''
        <tr>
            <td>%s</td>
            <td>%s</td>
            <td><a id="outurl" href="%s" target="_blank">%s</a></td>
            <td>%s</td>
            <td><input id="comments" name="comments" size="30" type="text" data-id="%s" value="%s"></td>
            <td>%s</td>
            <td>
                <a href="#" id="delete" data-id="%s">delete</a>
                <a href="#" id="archive" data-id="%s">archive</a>
                <a href="#" id="save" data-id="%s">save</a>
            </td>
        </tr>
        ''' % (
            each['keyword'], each['source'], each['url'], each['title'], each['location'], each['id'],
            each['comments'] if each['comments'] else "",
            each['created'], each['id'], each['id'], each['id'])
        html.append(item_html)
    return jsonify({"html": ''.join(html)})
Exemple #11
0
def addcomment():
    id = request.form["id"]
    comment = request.form["comment"]
    if id:
        db = DB()
        try:
            six.next(db._select2dic("scraper_craigslist", where="id = ?", where_values=[id]))
        except StopIteration:
            return jsonify({"code": 404, "message": "not found", "data": None})
        db._update("scraper_craigslist", where="id = ?", where_values=[id], comments=comment)
        db.commit()
        return jsonify({"code": 200, "message": "success", "data": None})
    return jsonify({"code": 500, "message": "missing id", "data": None})
Exemple #12
0
def spider_status():
    id = request.form["id"]
    status = request.form["status"]

    if request.headers.getlist("X-Forwarded-For"):
        ip = request.headers.getlist("X-Forwarded-For")[0]
    else:
        ip = request.remote_addr

    if id and status:
        db = DB()
        try:
            six.next(
                db._select2dic("scraper_setting", where="id = ?", where_values=[id]))
            if status == "Sleeping":
                db._update("scraper_setting", where="spider_ip = ?", where_values=[ip], spider_status=status,
                           last_full_scan_time=int(time.mktime(time.gmtime())))
            else:
                db._update("scraper_setting", where="spider_ip = ?", where_values=[ip], spider_status=status)
            db.commit()
            return jsonify({"code": 200, "message": "success", "data": None})
        except StopIteration:
            return jsonify({"code": 404, "message": "not found", "data": None})
    return jsonify({"code": 500, "message": "missing params", "data": None})
Exemple #13
0
def statusswitch():
    id = request.args.get("id", type=int)
    active = request.args.get("active", type=int)
    if id and active is not None:
        db = DB()
        try:
            active = 0 if active == 1 else 1
            ret = six.next(db._select2dic("scraper_setting", where="id = ?", where_values=[id]))

            if int(active) == 0:
                db._update("scraper_setting", where="id=?", where_values=[id], active=active, spider_ip="")
            else:
                db._update("scraper_setting", where="id = ?", where_values=[id], active=active)
            ret['active'] = active
            ret['spider_ip'] = "" if active == 0 else ret['spider_ip']
            db.commit()
            active_status = "inactive" if ret['active'] == 0 else "active"
            ret['active_status'] = active_status

            html = """
            <td>%s</td>
            <td>%s<br><a id="switch" href="#" data-id="%s" data-active="%s">Switch</a></td>
            <td>%s</td>
            <td>
                <a href="/deleteconfig?id=%s">delete</a>
                <a href="/editconfig?id=%s">edit</a>
            </td>
            <td>%s</td>
            """ % (
                ret['source'], ret["active_status"], ret['id'], ret['active'], ret['spider_ip'],
                ret['id'], ret['id'], ret["url"],)

            return jsonify({"code": 200, "message": "success", "data": html})
        except StopIteration:
            return jsonify({"code": 404, "message": "not found", "data": None})
    return jsonify({"code": 500, "message": "missing id", "data": None})
Exemple #14
0
def adddata():
    outid = request.form["outid"]
    url = request.form["url"]
    title = request.form["title"]
    source = request.form.get('source', 'craigslist')
    location = request.form["location"]
    thumbnail = request.form["thumbnail"]
    keyword = request.form["keyword"]
    created = request.form["created"]
    if outid and url and title and keyword and created and source:
        db = DB()
        try:
            six.next(
                db._select2dic("scraper_craigslist", where="outid = ? and source = ?", where_values=[outid, source]))
            return jsonify({"code": 303, "message": "data exists", "data": {'url': url, 'outid': outid}})
        except StopIteration:
            db._insert("scraper_craigslist",
                       **{"outid": outid, "url": url, "title": title, 'location': location, 'thumbnail': thumbnail,
                          'keyword': keyword, 'created': created, 'is_delete': 0, 'is_archive': 0, 'is_save': 0,
                          'source': source})
            db.commit()
            return jsonify({"code": 200, "message": "success", "data": None})
    return jsonify({"code": 500, "message": "missing params", "data": None})
Exemple #15
0
def addconfig():
    form = AddConfigForm()
    form.active.data = "0"
    if form.validate_on_submit():
        # spider_ip = form.spider_ip.data
        url = form.url.data
        active = form.active.data
        source = form.source.data
        db = DB()

        try:
            six.next(db._select2dic("scraper_setting", where="url = ? and source = ?",
                                    where_values=[url, source]))
            form.url.errors.append("url exists")
            flash_errors(form)
            return render_template("addconfig.html", form=form)
        except StopIteration:
            db._insert("scraper_setting", **{"spider_ip": "", "url": url, "active": active, "source": source})
            db.commit()
            return redirect(url_for("configlist"))
    else:
        flash_errors(form)
    return render_template("addconfig.html", form=form)
Exemple #16
0
def index():
    keyword = request.args.get("keyword", "")
    page = request.args.get("page", 1, type=int)
    offset = (page - 1) * 100
    # page, per_page, offset = get_page_args(page_parameter='page',
    #                                        per_page_parameter='per_page')
    db = DB()

    total = 0
    if keyword:
        rr = db._execute(
            "select count(1) from scraper_craigslist where keyword = ? and is_delete = 0 and is_archive = 0",
            values=[keyword])
        for row in rr:
            total = row[0]

        session_list = db._select2dic("scraper_craigslist", where="keyword = ? and is_delete = 0 and is_archive = 0",
                                      where_values=[keyword],
                                      order="created desc", limit=100, offset=offset)
    else:
        rr = db._execute(
            "select count(1) from scraper_craigslist where keyword is not null and is_delete = 0 and is_archive = 0")
        for row in rr:
            total = row[0]
        session_list = db._select2dic("scraper_craigslist",
                                      where="keyword is not null and is_delete = 0 and is_archive = 0",
                                      order="created desc", limit=100, offset=offset)
    pagination = get_pagination(page=page,
                                per_page=100,
                                total=total,
                                record_name='users',
                                format_total=True,
                                format_number=True,
                                )
    data = []
    for each in session_list:
        each['created'] = pretty_date(int(each['created']))
        data.append(each)
    return render_template("list2.html", session_list=data, keyword=keyword, pagination=pagination, page=page)
Exemple #17
0
def main():
    db = DB()
    for each in db._select2dic("scraper_craigslist", where="is_save = 1"):
        outid = each['outid']
        try:
            six.next(
                db._select2dic("scraper_archive",
                               where="outid = ?",
                               where_values=[outid]))
        except StopIteration:
            del each["id"]
            del each["is_delete"]
            del each["is_archive"]
            del each["is_save"]
            db._insert("scraper_archive", **each)
            db.commit()

    t = get_day_time()
    db._delete("scraper_craigslist", where="created <= ?", where_values=[t])
    db.commit()
Exemple #18
0
def keyword_filter():
    form = AddKeywordsFilterForm()
    db = DB()
    ret = None
    try:
        ret = six.next(db._select2dic("keyword_filter"))
    except:
        pass
    if form.validate_on_submit():
        keywords = form.keywords.data
        if ret:
            db._update("keyword_filter", where="id = ?", where_values=[ret['id']], keywords=keywords)
            db.commit()
            return render_template("keyword_filter.html", form=form)
        else:
            db._insert("keyword_filter", **{"keywords": keywords})
            db.commit()
            return redirect(url_for("keyword_filter"))
    else:
        flash_errors(form)

    if ret:
        form.keywords.data = ret['keywords']
    return render_template("keyword_filter.html", form=form)
Exemple #19
0
def archive():
    id = request.args.get("id", type=int)
    if id:
        db = DB()
        try:
            ret = six.next(db._select2dic("scraper_craigslist", where="id = ?", where_values=[id]))
            db._delete("scraper_craigslist", where="id = ?", where_values=[id])
            db.commit()
        except StopIteration:
            return jsonify({"code": 404, "message": "not found", "data": None})
        del ret["id"]
        del ret["is_delete"]
        del ret["is_archive"]
        del ret["is_save"]
        db._insert("scraper_archive", **ret)
        db.commit()
        return jsonify({"code": 200, "message": "success", "data": None})
    return jsonify({"code": 500, "message": "missing id", "data": None})