Beispiel #1
0
def scraperinfo_handler(request):
    result = []

    apikey = request.GET.get('apikey', None)

    quietfields = request.GET.get('quietfields', "").split("|")
    history_start_date = convert_date(
        request.GET.get('history_start_date', None))

    try:
        rev = int(request.GET.get('version', ''))
    except ValueError:
        rev = None

    for short_name in request.GET.get('name', "").split():
        scraper, err = getscraperorresponse(short_name)

        if err:
            result = json.dumps({'error': err, "short_name": short_name})
            if request.GET.get("callback"):
                result = "%s(%s)" % (request.GET.get("callback"), result)
            return HttpResponse(result)

        # Check accessibility if this scraper is private using
        # apikey
        if hasattr(scraper,
                   "privacy_status") and scraper.privacy_status == 'private':
            if not scraper.api_actionauthorized(apikey):
                scraper = u'Invalid API Key'

        if type(scraper) in [str, unicode]:
            result.append({'error': scraper, "short_name": short_name})
        else:
            result.append(
                scraperinfo(scraper, history_start_date, quietfields, rev))

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("getinfo",
                     key_data=request.GET.get('name', ""),
                     user=u,
                     code_object=None)

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=scraperinfo.json'
    return response
def scraperinfo_handler(request):
    result = [ ]

    apikey =request.GET.get('apikey', None)

    quietfields = request.GET.get('quietfields', "").split("|")
    history_start_date = convert_date(request.GET.get('history_start_date', None))


    try:
        rev = int(request.GET.get('version', ''))
    except ValueError:
        rev = None

    for short_name in request.GET.get('name', "").split():
        scraper,err = getscraperorresponse(short_name)

        if err:
            result = json.dumps({'error':err, "short_name":short_name})
            if request.GET.get("callback"):
                result = "%s(%s)" % (request.GET.get("callback"), result)
            return HttpResponse(result)


        # Check accessibility if this scraper is private using
        # apikey
        if hasattr(scraper, "privacy_status") and scraper.privacy_status == 'private':
            if not scraper.api_actionauthorized(apikey):
                scraper = u'Invalid API Key'

        if type(scraper) in [str, unicode]:
            result.append({'error':scraper, "short_name":short_name})
        else:
            result.append(scraperinfo(scraper, history_start_date, quietfields, rev))

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record( "getinfo", key_data=request.GET.get('name', ""),  user=u, code_object=None )

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=scraperinfo.json'
    return response
Beispiel #3
0
def userinfo_handler(request):
    username = request.GET.get('username', "")
    apikey = request.GET.get('apikey', "")
    users = User.objects.filter(username=username)
    result = []
    for user in users:  # list of users is normally 1
        info = {
            "username": user.username,
            "profilename": user.get_profile().name
        }
        info["datejoined"] = user.date_joined.isoformat()
        info['coderoles'] = {}
        for ucrole in user.usercoderole_set.exclude(
                code__privacy_status="deleted"):
            if ucrole.code.privacy_status != "private":
                if ucrole.role not in info['coderoles']:
                    info['coderoles'][ucrole.role] = []
                info['coderoles'][ucrole.role].append(ucrole.code.short_name)
            elif apikey:
                try:
                    api_user = UserProfile.objects.get(apikey=apikey).user
                    if api_user.usercoderole_set.filter(
                            code__short_name=ucrole.code.short_name):
                        if ucrole.role not in info['coderoles']:
                            info['coderoles'][ucrole.role] = []
                        info['coderoles'][ucrole.role].append(
                            ucrole.code.short_name)
                except UserProfile.DoesNotExist:
                    pass

        result.append(info)

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("getuserinfo",
                     key_data=username,
                     user=u,
                     code_object=None)

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=userinfo.json'
    return response
def userinfo_handler(request):
    username = request.GET.get('username', "")
    apikey = request.GET.get('apikey', "")
    users = User.objects.filter(username=username)
    result = [ ]
    for user in users:  # list of users is normally 1
        info = { "username":user.username, "profilename":user.get_profile().name }
        info["datejoined"] = user.date_joined.isoformat()
        info['coderoles'] = { }
        for ucrole in user.usercoderole_set.exclude(code__privacy_status="deleted"):
            if ucrole.code.privacy_status != "private":
                if ucrole.role not in info['coderoles']:
                    info['coderoles'][ucrole.role] = [ ]
                info['coderoles'][ucrole.role].append(ucrole.code.short_name)
            elif apikey:
                try:
                    api_user = UserProfile.objects.get(apikey=apikey).user
                    if api_user.usercoderole_set.filter(code__short_name=ucrole.code.short_name):
                        if ucrole.role not in info['coderoles']:
                            info['coderoles'][ucrole.role] = [ ]
                        info['coderoles'][ucrole.role].append(ucrole.code.short_name)
                except UserProfile.DoesNotExist:
                    pass


        result.append(info)

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record( "getuserinfo", key_data=username,  user=u, code_object=None )

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=userinfo.json'
    return response
Beispiel #5
0
def runevent_handler(request):
    apikey = request.GET.get("apikey", None)

    short_name = request.GET.get("name")
    scraper, err = getscraperorresponse(short_name)
    if err:
        result = json.dumps({"error": err, "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    kd = scraper.short_name
    s = scraper

    # Check accessibility if this scraper is private using
    # apikey
    if not scraper.api_actionauthorized(apikey):
        result = json.dumps({"error": "Invalid API Key", "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)
    if scraper.privacy_status == "private":  # XXX not sure why we do this, do metrics not work with private? FAI
        kd, s = "", None

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("runeventinfo", key_data=kd, user=u, code_object=s)

    runid = request.GET.get("runid", "-1")
    runevent = None
    if scraper.wiki_type != "view":
        # negative index counts back from the most recent run
        if runid[0] == "-":
            try:
                i = -int(runid) - 1
                runevents = scraper.scraper.scraperrunevent_set.all().order_by("-run_started")
                if i < len(runevents):
                    runevent = runevents[i]
            except ValueError:
                pass
        if not runevent:
            try:
                runevent = scraper.scraper.scraperrunevent_set.get(run_id=runid)
            except ScraperRunEvent.DoesNotExist:
                pass

    if not runevent:
        result = json.dumps({"error": "run_event not found", "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    info = {
        "runid": runevent.run_id,
        "run_started": runevent.run_started.isoformat(),
        "records_produced": runevent.records_produced,
        "pages_scraped": runevent.pages_scraped,
    }
    if runevent.run_ended:
        info["run_ended"] = runevent.run_ended.isoformat()
    if runevent.exception_message:
        info["exception_message"] = runevent.exception_message

    info["output"] = runevent.output
    if runevent.first_url_scraped:
        info["first_url_scraped"] = runevent.first_url_scraped

    domainsscraped = []
    for domainscrape in runevent.domainscrape_set.all():
        domainsscraped.append(
            {"domain": domainscrape.domain, "bytes": domainscrape.bytes_scraped, "pages": domainscrape.pages_scraped}
        )
    if domainsscraped:
        info["domainsscraped"] = domainsscraped

    result = [info]  # a list with one element
    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype="application/json; charset=utf-8")
    response["Content-Disposition"] = "attachment; filename=runevent.json"
    return response
Beispiel #6
0
def usersearch_handler(request):
    query = request.GET.get("searchquery")
    try:
        maxrows = int(request.GET.get("maxrows", ""))
    except ValueError:
        maxrows = 5

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("usersearch", key_data=query, user=u, code_object=None)

    # usernames we don't want to be returned in the search
    nolist = request.GET.get("nolist", "").split()

    srequestinguser = request.GET.get("requestinguser", "")
    lrequestinguser = User.objects.filter(username=srequestinguser)
    if lrequestinguser:
        requestinguser = lrequestinguser[0]
    else:
        requestinguser = None

    if query:
        users = User.objects.filter(username__icontains=query)
        userprofiles = User.objects.filter(userprofile__name__icontains=query)
        users_all = users | userprofiles
    else:
        users_all = User.objects.all()
    users_all = users_all.order_by("username")

    # if there is a requestinguser, then rank by overlaps and sort
    # (inefficient, but I got no other ideas right now)
    # (could be doing something with scraper.userrolemap())
    if requestinguser:
        requestuserscraperset = set(
            [usercoderole.code.short_name for usercoderole in requestinguser.usercoderole_set.all()]
        )
        userlist = list(users_all)
        for user in userlist:
            user.colleaguescore = len(
                requestuserscraperset.intersection(
                    [usercoderole.code.short_name for usercoderole in user.usercoderole_set.all()]
                )
            )
        userlist.sort(key=lambda user: user.colleaguescore, reverse=True)
        # for user in userlist:
        #    print (user, user.colleaguescore)
    else:
        userlist = users_all[: (maxrows + len(nolist))]

    result = []
    for user in userlist:
        if user.username not in nolist:
            res = {
                "username": user.username,
                "profilename": user.get_profile().name,
                "date_joined": user.date_joined.isoformat(),
            }
            result.append(res)
        if len(result) > maxrows:
            break

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)

    response = HttpResponse(res, mimetype="application/json; charset=utf-8")
    response["Content-Disposition"] = "attachment; filename=search.json"
    return response
Beispiel #7
0
def scraper_search_handler(request):
    apikey = request.GET.get("apikey", None)

    query = request.GET.get("query")
    if not query:
        query = request.GET.get("searchquery")
    try:
        maxrows = int(request.GET.get("maxrows", ""))
    except ValueError:
        maxrows = 5
    result = []  # list of dicts

    boverduescraperrequest = False
    if query == "*OVERDUE*":
        # We should check apikey against our shared secret. If it matches then it should
        # be allowed to continue.

        if request.META.get("HTTP_X_REAL_IP", "Not specified") in settings.INTERNAL_IPS:
            boverduescraperrequest = True
        if settings.INTERNAL_IPS == ["IGNORETHIS_IPS_CONSTRAINT"] or "127.0.0.1" in settings.INTERNAL_IPS:
            boverduescraperrequest = True
    else:
        u = None
        if request.user.is_authenticated():
            u = request.user
        APIMetric.record("scrapersearch", key_data=query, user=u, code_object=None)

    # TODO: If the user has specified an API key then we should pass them into
    # the search query and refine the resultset  to show only valid scrapers
    if boverduescraperrequest:
        scrapers_all = scrapers_overdue()
    else:
        scrapers_all = scraper_search_query_unordered(user=None, query=query, apikey=apikey)

    # scrapers we don't want to be returned in the search
    nolist = request.GET.get("nolist", "").split()
    quietfields = request.GET.get("quietfields", "").split("|")
    # offset = request.GET.get('offset', 0)

    srequestinguser = request.GET.get("requestinguser", "")
    lrequestinguser = User.objects.filter(username=srequestinguser)
    if lrequestinguser:
        requestinguser = lrequestinguser[0]
    else:
        requestinguser = None

    # convert the query into an ordered list
    if boverduescraperrequest:
        scraperlist = scrapers_all

        # probably a way of sorting by some ranking on these ownership fields right in the database
    elif requestinguser:
        scraperlist = list(scrapers_all.distinct())
        for scraper in scraperlist:
            usercoderoles = UserCodeRole.objects.filter(code=scraper, user=requestinguser)
            if usercoderoles:
                if usercoderoles[0].role == "owner":
                    scraper.colleaguescore = (3, scraper.short_name)  # created_at
                elif usercoderoles[0].role == "editor":
                    scraper.colleaguescore = (2, scraper.short_name)  # created_at
                else:
                    scraper.colleaguescore = (1, scraper.short_name)  # created_at
            else:
                scraper.colleaguescore = (0, scraper.short_name)  # created_at
        scraperlist.sort(key=lambda user: user.colleaguescore, reverse=True)
    else:
        scrapers_all = scrapers_all.order_by("-created_at")
        scraperlist = scrapers_all.distinct()[: (maxrows + len(nolist))]

    for scraper in scraperlist:
        if scraper.short_name in nolist:
            continue
        res = {"short_name": scraper.short_name}
        res["title"] = scraper.title
        owners = scraper.userrolemap()["owner"]
        if owners:
            owner = owners[0]
            try:
                profile = owner.get_profile()
                ownername = profile.name
                if boverduescraperrequest:
                    res["beta_user"] = profile.beta_user  # to enable certain scrapers to go through the lxc process
            except frontend.models.UserProfile.DoesNotExist:
                ownername = owner.username
            if not ownername:
                ownername = owner.username
            if ownername:
                res["title"] = "%s / %s" % (ownername, scraper.title)
        if "description" not in quietfields:
            res["description"] = scraper.description_safepart()
        res["created"] = scraper.created_at.isoformat()
        res["privacy_status"] = scraper.privacy_status
        res["language"] = scraper.language

        # extra data added to the overdue request kind so that twister has everything it needs to get on with it
        # and doesn't need to call back for further information
        if boverduescraperrequest:
            res["overdue_proportion"] = float(scraper.overdue_proportion)
            vcsstatus = scraper.get_vcs_status(-1)
            res["code"] = vcsstatus.get("code", "#Code not previously saved")
            res["rev"] = vcsstatus.get("prevcommit", {}).get("rev", -1)
            res["guid"] = scraper.guid
            res["attachables"] = [ascraper.short_name for ascraper in scraper.attachable_scraperdatabases()]
            res["envvars"] = scraper.description_envvars()

        result.append(res)
        if len(result) > maxrows:
            break

    if request.GET.get("format") == "csv":
        fout = StringIO()
        writer = csv.writer(fout, dialect="excel")
        headers = ["short_name", "title", "description", "created", "privacy_status"]
        writer.writerow(headers)
        for r in result:
            writer.writerow([r[header] for header in headers])
        response = HttpResponse(fout.getvalue(), mimetype="text/csv")
        response["Content-Disposition"] = "attachment; filename=search.csv"
        return response

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype="application/json; charset=utf-8")
    # response['Content-Disposition'] = 'attachment; filename=search.json'
    return response
Beispiel #8
0
def sqlite_handler(request):
    short_name = request.GET.get("name")
    apikey = request.GET.get("apikey", None)

    scraper, err = getscraperorresponse(short_name)
    if err:
        result = json.dumps({"error": err, "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    u, s, kd = None, None, ""
    if request.user.is_authenticated():
        u = request.user

    if scraper.privacy_status != "private":
        s = scraper  # XX why this only when not private? FAI
        kd = short_name
    else:
        # When private we MUST have an apikey and it should match
        if not scraper.api_actionauthorized(apikey):
            result = json.dumps({"error": "Invalid API Key", "short_name": short_name})
            if request.GET.get("callback"):
                result = "%s(%s)" % (request.GET.get("callback"), result)
            return HttpResponse(result)

    APIMetric.record("sqlite", key_data=kd, user=u, code_object=s)

    dataproxy = DataStore(request.GET.get("name"))
    lattachlist = request.GET.get("attach", "").split(";")
    attachlist = []
    for aattach in lattachlist:
        if aattach:
            aa = aattach.split(",")
            attachi = {"name": aa[0], "asname": (len(aa) == 2 and aa[1] or None)}
            attachlist.append(attachi)
            dataproxy.request(
                {
                    "maincommand": "sqlitecommand",
                    "command": "attach",
                    "name": attachi["name"],
                    "asname": attachi["asname"],
                }
            )

    sqlquery = request.GET.get("query", "")
    format = request.GET.get("format", "json")
    if format == "json":
        format = "jsondict"

    req = {"maincommand": "sqliteexecute", "sqlquery": sqlquery, "data": None, "attachlist": attachlist}
    if format == "csv":
        req["streamchunking"] = 1000

    # This is inlined from the dataproxy.request() function to allow for
    # receiveoneline to perform multiple readlines in this case.
    # (this is the stream-chunking thing.  the right interface is not yet
    # apparent)

    dataproxy.m_socket.sendall(json.dumps(req) + "\n")

    if format not in ["jsondict", "jsonlist", "csv", "htmltable", "rss2"]:
        dataproxy.close()
        return HttpResponse("Error: the format '%s' is not supported" % format)

    if format in ["csv", "htmltable"]:
        return out_csvhtml(dataproxy, scraper.short_name, format)
    if format == "rss2":
        return out_rss2(dataproxy, scraper)

    return out_json(dataproxy, request.GET.get("callback"), scraper.short_name, format)
Beispiel #9
0
def runevent_handler(request):
    apikey = request.GET.get('apikey', None)

    short_name = request.GET.get('name')
    scraper, err = getscraperorresponse(short_name)
    if err:
        result = json.dumps({'error': err, "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    kd = scraper.short_name
    s = scraper

    # Check accessibility if this scraper is private using
    # apikey
    if not scraper.api_actionauthorized(apikey):
        result = json.dumps({
            'error': "Invalid API Key",
            "short_name": short_name
        })
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)
    if scraper.privacy_status == 'private':  # XXX not sure why we do this, do metrics not work with private? FAI
        kd, s = '', None

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("runeventinfo", key_data=kd, user=u, code_object=s)

    runid = request.GET.get('runid', '-1')
    runevent = None
    if scraper.wiki_type != "view":
        # negative index counts back from the most recent run
        if runid[0] == '-':
            try:
                i = -int(runid) - 1
                runevents = scraper.scraper.scraperrunevent_set.all().order_by(
                    '-run_started')
                if i < len(runevents):
                    runevent = runevents[i]
            except ValueError:
                pass
        if not runevent:
            try:
                runevent = scraper.scraper.scraperrunevent_set.get(
                    run_id=runid)
            except ScraperRunEvent.DoesNotExist:
                pass

    if not runevent:
        result = json.dumps({
            'error': "run_event not found",
            "short_name": short_name
        })
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    info = {
        "runid": runevent.run_id,
        "run_started": runevent.run_started.isoformat(),
        "records_produced": runevent.records_produced,
        "pages_scraped": runevent.pages_scraped
    }
    if runevent.run_ended:
        info['run_ended'] = runevent.run_ended.isoformat()
    if runevent.exception_message:
        info['exception_message'] = runevent.exception_message

    info['output'] = runevent.output
    if runevent.first_url_scraped:
        info['first_url_scraped'] = runevent.first_url_scraped

    domainsscraped = []
    for domainscrape in runevent.domainscrape_set.all():
        domainsscraped.append({
            'domain': domainscrape.domain,
            'bytes': domainscrape.bytes_scraped,
            'pages': domainscrape.pages_scraped
        })
    if domainsscraped:
        info['domainsscraped'] = domainsscraped

    result = [info]  # a list with one element
    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=runevent.json'
    return response
Beispiel #10
0
def usersearch_handler(request):
    query = request.GET.get('searchquery')
    try:
        maxrows = int(request.GET.get('maxrows', ""))
    except ValueError:
        maxrows = 5

    u = None
    if request.user.is_authenticated():
        u = request.user
    APIMetric.record("usersearch", key_data=query, user=u, code_object=None)

    # usernames we don't want to be returned in the search
    nolist = request.GET.get("nolist", "").split()

    srequestinguser = request.GET.get("requestinguser", "")
    lrequestinguser = User.objects.filter(username=srequestinguser)
    if lrequestinguser:
        requestinguser = lrequestinguser[0]
    else:
        requestinguser = None

    if query:
        users = User.objects.filter(username__icontains=query)
        userprofiles = User.objects.filter(userprofile__name__icontains=query)
        users_all = users | userprofiles
    else:
        users_all = User.objects.all()
    users_all = users_all.order_by('username')

    # if there is a requestinguser, then rank by overlaps and sort
    # (inefficient, but I got no other ideas right now)
    # (could be doing something with scraper.userrolemap())
    if requestinguser:
        requestuserscraperset = set([
            usercoderole.code.short_name
            for usercoderole in requestinguser.usercoderole_set.all()
        ])
        userlist = list(users_all)
        for user in userlist:
            user.colleaguescore = len(
                requestuserscraperset.intersection([
                    usercoderole.code.short_name
                    for usercoderole in user.usercoderole_set.all()
                ]))
        userlist.sort(key=lambda user: user.colleaguescore, reverse=True)
        #for user in userlist:
        #    print (user, user.colleaguescore)
    else:
        userlist = users_all[:(maxrows + len(nolist))]

    result = []
    for user in userlist:
        if user.username not in nolist:
            res = {
                'username': user.username,
                "profilename": user.get_profile().name,
                "date_joined": user.date_joined.isoformat()
            }
            result.append(res)
        if len(result) > maxrows:
            break

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)

    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    response['Content-Disposition'] = 'attachment; filename=search.json'
    return response
Beispiel #11
0
def scraper_search_handler(request):
    apikey = request.GET.get('apikey', None)

    query = request.GET.get('query')
    if not query:
        query = request.GET.get('searchquery')
    try:
        maxrows = int(request.GET.get('maxrows', ""))
    except ValueError:
        maxrows = 5
    result = []  # list of dicts

    boverduescraperrequest = False
    if query == "*OVERDUE*":
        # We should check apikey against our shared secret. If it matches then it should
        # be allowed to continue.

        if request.META.get("HTTP_X_REAL_IP",
                            "Not specified") in settings.INTERNAL_IPS:
            boverduescraperrequest = True
        if settings.INTERNAL_IPS == ["IGNORETHIS_IPS_CONSTRAINT"
                                     ] or '127.0.0.1' in settings.INTERNAL_IPS:
            boverduescraperrequest = True
    else:
        u = None
        if request.user.is_authenticated():
            u = request.user
        APIMetric.record("scrapersearch",
                         key_data=query,
                         user=u,
                         code_object=None)

    # TODO: If the user has specified an API key then we should pass them into
    # the search query and refine the resultset  to show only valid scrapers
    if boverduescraperrequest:
        scrapers_all = scrapers_overdue()
    else:
        scrapers_all = scraper_search_query_unordered(user=None,
                                                      query=query,
                                                      apikey=apikey)

    # scrapers we don't want to be returned in the search
    nolist = request.GET.get("nolist", "").split()
    quietfields = request.GET.get('quietfields', "").split("|")
    #offset = request.GET.get('offset', 0)

    srequestinguser = request.GET.get("requestinguser", "")
    lrequestinguser = User.objects.filter(username=srequestinguser)
    if lrequestinguser:
        requestinguser = lrequestinguser[0]
    else:
        requestinguser = None

    # convert the query into an ordered list
    if boverduescraperrequest:
        scraperlist = scrapers_all

        # probably a way of sorting by some ranking on these ownership fields right in the database
    elif requestinguser:
        scraperlist = list(scrapers_all.distinct())
        for scraper in scraperlist:
            usercoderoles = UserCodeRole.objects.filter(code=scraper,
                                                        user=requestinguser)
            if usercoderoles:
                if usercoderoles[0].role == "owner":
                    scraper.colleaguescore = (3, scraper.short_name
                                              )  # created_at
                elif usercoderoles[0].role == "editor":
                    scraper.colleaguescore = (2, scraper.short_name
                                              )  # created_at
                else:
                    scraper.colleaguescore = (1, scraper.short_name
                                              )  # created_at
            else:
                scraper.colleaguescore = (0, scraper.short_name)  # created_at
        scraperlist.sort(key=lambda user: user.colleaguescore, reverse=True)
    else:
        scrapers_all = scrapers_all.order_by('-created_at')
        scraperlist = scrapers_all.distinct()[:(maxrows + len(nolist))]

    for scraper in scraperlist:
        if scraper.short_name in nolist:
            continue
        res = {'short_name': scraper.short_name}
        res['title'] = scraper.title
        owners = scraper.userrolemap()["owner"]
        if owners:
            owner = owners[0]
            try:
                profile = owner.get_profile()
                ownername = profile.name
                if boverduescraperrequest:
                    res['beta_user'] = profile.beta_user  # to enable certain scrapers to go through the lxc process
            except frontend.models.UserProfile.DoesNotExist:
                ownername = owner.username
            if not ownername:
                ownername = owner.username
            if ownername:
                res['title'] = "%s / %s" % (ownername, scraper.title)
        if 'description' not in quietfields:
            res['description'] = scraper.description_safepart()
        res['created'] = scraper.created_at.isoformat()
        res['privacy_status'] = scraper.privacy_status
        res['language'] = scraper.language

        # extra data added to the overdue request kind so that twister has everything it needs to get on with it
        # and doesn't need to call back for further information
        if boverduescraperrequest:
            res['overdue_proportion'] = float(scraper.overdue_proportion)
            vcsstatus = scraper.get_vcs_status(-1)
            res['code'] = vcsstatus.get("code", "#Code not previously saved")
            res["rev"] = vcsstatus.get("prevcommit", {}).get("rev", -1)
            res['guid'] = scraper.guid
            res["attachables"] = [
                ascraper.short_name
                for ascraper in scraper.attachable_scraperdatabases()
            ]
            res["envvars"] = scraper.description_envvars()

        result.append(res)
        if len(result) > maxrows:
            break

    if request.GET.get("format") == "csv":
        fout = StringIO()
        writer = csv.writer(fout, dialect='excel')
        headers = [
            'short_name', 'title', 'description', 'created', 'privacy_status'
        ]
        writer.writerow(headers)
        for r in result:
            writer.writerow([r[header] for header in headers])
        response = HttpResponse(fout.getvalue(), mimetype='text/csv')
        response['Content-Disposition'] = 'attachment; filename=search.csv'
        return response

    res = json.dumps(result, indent=4)
    callback = request.GET.get("callback")
    if callback:
        res = "%s(%s)" % (callback, res)
    response = HttpResponse(res, mimetype='application/json; charset=utf-8')
    #response['Content-Disposition'] = 'attachment; filename=search.json'
    return response
Beispiel #12
0
def sqlite_handler(request):
    short_name = request.GET.get('name')
    apikey = request.GET.get('apikey', None)

    scraper, err = getscraperorresponse(short_name)
    if err:
        result = json.dumps({'error': err, "short_name": short_name})
        if request.GET.get("callback"):
            result = "%s(%s)" % (request.GET.get("callback"), result)
        return HttpResponse(result)

    u, s, kd = None, None, ""
    if request.user.is_authenticated():
        u = request.user

    if scraper.privacy_status != "private":
        s = scraper  # XX why this only when not private? FAI
        kd = short_name
    else:
        # When private we MUST have an apikey and it should match
        if not scraper.api_actionauthorized(apikey):
            result = json.dumps({
                'error': "Invalid API Key",
                "short_name": short_name
            })
            if request.GET.get("callback"):
                result = "%s(%s)" % (request.GET.get("callback"), result)
            return HttpResponse(result)

    APIMetric.record("sqlite", key_data=kd, user=u, code_object=s)

    dataproxy = DataStore(request.GET.get('name'))
    lattachlist = request.GET.get('attach', '').split(";")
    attachlist = []
    for aattach in lattachlist:
        if aattach:
            aa = aattach.split(",")
            attachi = {
                "name": aa[0],
                "asname": (len(aa) == 2 and aa[1] or None)
            }
            attachlist.append(attachi)
            dataproxy.request({
                "maincommand": "sqlitecommand",
                "command": "attach",
                "name": attachi["name"],
                "asname": attachi["asname"]
            })

    sqlquery = request.GET.get('query', "")
    format = request.GET.get("format", "json")
    if format == "json":
        format = "jsondict"

    req = {
        "maincommand": "sqliteexecute",
        "sqlquery": sqlquery,
        "data": None,
        "attachlist": attachlist
    }
    if format == "csv":
        req["streamchunking"] = 1000

    # This is inlined from the dataproxy.request() function to allow for
    # receiveoneline to perform multiple readlines in this case.
    # (this is the stream-chunking thing.  the right interface is not yet
    # apparent)

    dataproxy.m_socket.sendall(json.dumps(req) + '\n')

    if format not in [
            "jsondict", "jsonlist", "csv", "htmltable", "rss2",
            "base64singleton", "htmltable_unescaped"
    ]:
        dataproxy.close()
        return HttpResponse("Error: the format '%s' is not supported" % format)

    if format in ["csv", 'htmltable', 'htmltable_unescaped']:
        return out_csvhtml(dataproxy, scraper.short_name, format)
    if format == "rss2":
        return out_rss2(dataproxy, scraper)
    if format == "base64singleton":
        return out_base64singleton(dataproxy,
                                   request.GET.get('mimetype', "text/plain"))

    return out_json(dataproxy, request.GET.get("callback"), scraper.short_name,
                    format)