예제 #1
0
def plot3(dbname, condition):
    #users of beacons - % pages with at least 1 beacon
    logname = open(u'plot3' + dbname + '.csv', 'w')
    db = SqlConnector(dbname)

    output = db.execute("SELECT domains.domain, count(distinct pages.id) \
                as pagescount FROM images INNER JOIN pages ON \
                pages.id = images.id_pages INNER JOIN domains ON \
                pages.id_domains = domains.id group by domains.domain order by pagescount DESC;"
                        )

    for item in output:
        pages_count = item[1]
        output = db.execute("SELECT count(distinct pages.id) \
                as pagescount FROM images INNER JOIN pages ON \
                pages.id = images.id_pages INNER JOIN domains ON \
                pages.id_domains = domains.id WHERE domains.domain = '" +
                            item[0] + "' and " + condition + ";")
        percentage = output[0][0] / item[1] * 100
        print("{0} {1} {2} : {3}%\n".format(item[0], item[1], output[0][0],
                                            percentage))
        logname.write("{0};{1}\n".format(item[0], percentage))
    logname.close()
예제 #2
0
def plot1(dbname, condition):
    #providers of beacons - count of beacons
    logname = open(u'plot1' + dbname + '.csv', 'w')
    db = SqlConnector(dbname)

    output = db.execute(
        "SELECT image_domains.domain, count(images.id) \
                as imgcount FROM images INNER JOIN image_domains ON \
                image_domains.id = images.id_image_domains WHERE " +
        condition + " group by image_domains.domain order by imgcount DESC;")

    for item in output:
        logname.write("{0};{1}\n".format(item[0], item[1]))
        print(item)

    logname.close()
예제 #3
0
def extract_twitter_pixels(dbname):
    logp1 = open(u'twitter_pixels.txt', 'w')

    db = SqlConnector(dbname)

    output = db.execute(
        "SELECT images.url, pages.url, images.width, images.height \
                FROM images INNER JOIN pages ON \
                pages.id = images.id_pages INNER JOIN image_domains ON \
                images.id_image_domains = image_domains.id WHERE width<=1 \
                and height<=1 and image_domains.domain = 'twitter.com';")

    for item in output:
        logp1.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
            item[1], item[2], item[3], item[0]))

    logp1.close()
예제 #4
0
def extract_facebook_pixels(dbname):
    logp1 = open(u'fb_pixel_pattern1.txt', 'w')
    logp2 = open(u'fb_pixel_pattern2.txt', 'w')
    logp3 = open(u'fb_pixel_pattern3.txt', 'w')
    logp4 = open(u'fb_pixel_pattern4.txt', 'w')
    logp5 = open(u'fb_pixel_pattern5.txt', 'w')

    db = SqlConnector(dbname)

    output = db.execute(
        "SELECT images.url, pages.url, images.width, images.height \
                FROM images INNER JOIN pages ON \
                pages.id = images.id_pages INNER JOIN image_domains ON \
                images.id_image_domains = image_domains.id WHERE width<=1 \
                and height<=1 and image_domains.domain = 'facebook.com';")

    fb_unique_pixels1 = []
    fb_unique_pixels2 = []
    fb_unique_pixels3 = []
    fb_unique_pixels4 = []
    fb_unique_pixels5 = []
    fb_unique_pixels3 = []
    count_pages_pattern1 = 0
    count_pages_pattern2 = 0
    count_pages_pattern3 = 0
    count_pages_pattern4 = 0
    count_pages_pattern5 = 0

    for item in output:

        if item[0].find("/tr/?") != -1:
            count_pages_pattern1 += 1
            fb_pixel = extract_pixel(item[0])

            if fb_pixel != "null" and fb_pixel not in fb_unique_pixels1:
                fb_unique_pixels1.append(fb_pixel)
                logp1.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
                    item[1], item[2], item[3], item[0]))

        elif item[0].find("/tr?") != -1:
            count_pages_pattern2 += 1
            fb_pixel = extract_pixel(item[0])

            if fb_pixel != "null" and fb_pixel not in fb_unique_pixels2:
                fb_unique_pixels2.append(fb_pixel)
                logp2.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
                    item[1], item[2], item[3], item[0]))

        elif item[0].find("brandlift.php?") != -1:
            count_pages_pattern3 += 1
            fb_pixel = extract_pixel(item[0])

            if fb_pixel != "null" and fb_pixel not in fb_unique_pixels3:
                fb_unique_pixels3.append(fb_pixel)
                logp3.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
                    item[1], item[2], item[3], item[0]))

        elif item[0].find("offsite_event.php?") != -1:
            count_pages_pattern4 += 1
            fb_pixel = extract_pixel(item[0])

            if fb_pixel != "null" and fb_pixel not in fb_unique_pixels4:
                fb_unique_pixels4.append(fb_pixel)
                logp4.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
                    item[1], item[2], item[3], item[0]))
        else:
            count_pages_pattern5 += 1
            fb_pixel = "null"
            b = item[0].find("spacer.gif?")
            if b != -1:
                fb_pixel = item[0][b + 11:len(item[0])]
                if fb_pixel != "null" and fb_pixel not in fb_unique_pixels5:
                    fb_unique_pixels5.append(fb_pixel)
            logp5.write("On page {0} w={1} h={2}\n\t {3}\n\n".format(
                item[1], item[2], item[3], item[0]))

    p1 = len(fb_unique_pixels1)
    p2 = len(fb_unique_pixels2)
    p3 = len(fb_unique_pixels3)
    p4 = len(fb_unique_pixels4)
    p5 = len(fb_unique_pixels5)

    print('Count of pages with fb pixels [pattern1]: {0}'.format(
        count_pages_pattern1))
    logp1.write('Count of pages with fb pixels [pattern1]: {0}\n'.format(
        count_pages_pattern1))

    print('Count of pages with fb pixels [pattern2]: {0}'.format(
        count_pages_pattern2))
    logp2.write('Count of pages with fb pixels [pattern2]: {0}\n'.format(
        count_pages_pattern2))

    print('Count of pages with fb pixels [pattern3]: {0}'.format(
        count_pages_pattern3))
    logp3.write('Count of pages with fb pixels [pattern3]: {0}\n'.format(
        count_pages_pattern3))

    print('Count of pages with fb pixels [pattern4]: {0}'.format(
        count_pages_pattern4))
    logp4.write('Count of pages with fb pixels [pattern4]: {0}\n'.format(
        count_pages_pattern4))

    print('Count of pages with fb pixels [pattern5]: {0}'.format(
        count_pages_pattern5))
    logp5.write('Count of pages with fb pixels [pattern5]: {0}\n'.format(
        count_pages_pattern5))

    print('Count of unique fb pixels [pattern1]: {0}'.format(p1))
    logp1.write('Count of unique fb pixels [pattern1]: {0}\n'.format(p1))

    print('Count of unique fb pixels [pattern2]: {0}'.format(p2))
    logp2.write('Count of unique fb pixels [pattern2]: {0}\n'.format(p2))

    print('Count of unique fb pixels [pattern3]: {0}'.format(p3))
    logp3.write('Count of unique fb pixels [pattern3]: {0}\n'.format(p3))

    print('Count of unique fb pixels [pattern4]: {0}'.format(p4))
    logp4.write('Count of unique fb pixels [pattern4]: {0}\n'.format(p4))

    print('Count of unique fb pixels [pattern5]: {0}'.format(p5))
    logp5.write('Count of unique fb pixels [pattern5]: {0}\n'.format(p5))

    print('Total count of uniqie fb pixels: {0}'.format(p1 + p2 + p3 + p4 +
                                                        p5))
    print('Total count of pages with fb pixels: {0}'.format(
        count_pages_pattern1 + count_pages_pattern2 + count_pages_pattern3 +
        count_pages_pattern4 + count_pages_pattern5))

    logp1.close()
    logp2.close()
    logp3.close()
    logp4.close()
    logp5.close()