Example #1
0
def convert_site_colorspace():
    filename = f"convert_{file_safe_timestamp()}.log"
    f = open(f"{STORAGE_LOGS_PATH}/{filename}", "x")
    f.write(str(datetime.datetime.now()) + "\n\n")

    session = Session()
    screenshots = session.query(Screenshot).filter_by(type=ScreenshotEnum.RGB)
    for screenshot in screenshots:
        sc_check = session.query(Screenshot).filter_by(
            site_id=screenshot.site_id)
        flag = False
        for sc in sc_check:
            if sc.type == ScreenshotEnum.GREYSCALE:
                print('Found greyscale version of screenshot.')
                f.write(
                    f"Found greyscale version of screenshot. Skipping (id={sc.id})\n\n"
                )

                flag = True
        if flag:
            pass
        else:
            site_name = session.query(Site).get(screenshot.site_id).name

            print(
                f"Converting screenshot of site {site_name} from RGB to GREYSCALE"
            )
            f.write(
                f"Converting screenshot of site {site_name} from RGB to GREYSCALE \n"
            )

            path = to_greyscale(screenshot.path, site_name)
            greyscale_screenshot = Screenshot(site_id=screenshot.site_id,
                                              type=ScreenshotEnum.GREYSCALE,
                                              path=path)
            session.add(greyscale_screenshot)
            session.commit()

            print(f"Finished conversion of {site_name} from RGB to GREYSCALE")
            f.write(
                f"Finished conversion of {site_name} from RGB to GREYSCALE \n\n"
            )

    session.close()
Example #2
0
def parse_collected_data():
    session = Session()
    data = session.query(Response).filter_by(parsed=False)

    filename = f"parsed_response_{file_safe_timestamp()}.log"
    f = open(f"{STORAGE_LOGS_PATH}/{filename}", "x")
    f.write(str(datetime.datetime.now()) + "\n\n")

    for site in data:
        response_id = site.id
        print(f"Beginning response number {response_id}")
        f.write(f"Beginning response number {response_id} \n")

        for response in parse_response(site.response):
            url = response['DataUrl']
            print(f"Beginning site {url}")
            f.write(f"Beginning site {url}: ")
            rank = response['Global']['Rank']
            reach_per_million = response['Country']['Reach']['PerMillion']
            page_views_per_million = response['Country']['PageViews'][
                'PerMillion']
            page_views_per_user = response['Country']['PageViews']['PerUser']
            f.write(
                f"Rank: {rank}, Reach/Million: {reach_per_million}, Page Views/Million: {page_views_per_million}, Page Views/User: {page_views_per_user} \n"
            )
            parsed_response = ParsedResponse(
                response_id=response_id,
                url=url,
                rank=rank,
                reach_per_million=reach_per_million,
                page_views_per_million=page_views_per_million,
                page_views_per_user=page_views_per_user)
            session.add(parsed_response)
            session.commit()

        session.query(Response).get(response_id).parsed = True
        print(f"Finished parsing response number {response_id}")
        f.write(f"Finished parsing response number {response_id} \n\n")
        session.commit()

    session.close()
    f.close()
Example #3
0
def convert_parsed_to_site():
    session = Session()
    data = session.query(ParsedResponse).all()

    filename = f"parsed_to_sites_{file_safe_timestamp()}.log"
    f = open(f"{STORAGE_LOGS_PATH}/{filename}", "x")
    f.write(str(datetime.datetime.now()) + "\n\n")

    for response in data:
        print(f"Beginning response {response.id}")
        f.write(f"Beginning response {response.id}: \n")
        split_url = response.url.split(".")
        site = Site(name='_'.join(split_url), host=response.url)
        print(f"Parsing site {'_'.join(split_url)} at host {response.url}")
        f.write(
            f"Parsing site {'_'.join(split_url)} at host {response.url} \n\n")
        session.add(site)
        session.commit()
        print(f"Finished parsing response number {response.id}")
        f.write(f"Finished parsing response number {response.id} \n\n")

    session.close()
Example #4
0
def collect_aws_data():
    session = Session()
    limit = 1001
    lower = 1
    interval = 100

    for i in range(lower, limit, interval):
        query_start = int(open(QUERY_START_PATH, 'r').readlines()[0])
        print(f"Querying sites at count {query_start}")
        if query_start >= limit:
            print("Exceeded limit")
            break
        aws_request_url = query_api_url(start=query_start, count=interval)
        aws_res = make_api_request(aws_request_url)
        log_query_response(aws_res.json())
        response = Response(query=aws_request_url, response=aws_res.json())
        session.add(response)
        session.commit()
        f = open(QUERY_START_PATH, 'w')
        f.write(str(query_start + interval))
        f.close()

    session.close()