Ejemplo n.º 1
0
def main():
    print 'Populating the Database from page_meta_data.txt'
    print '************************************************'

    from ifind.models.game_models import Category, Page
    from configuration import MEDIA_ROOT, STATIC_PATH
    from ifind.common.utils import convert_url_to_filename

    tuples_list = read_in_file('page_meta_data.txt')
    c = Category(name="research",

                 icon=os.path.join(STATIC_PATH,'imgs/research.jpg'), desc=None, is_shown=True)
    c.save()
    c = Category(name="about glasgow", icon=os.path.join(STATIC_PATH,'imgs/about_glasgow.jpg'), desc=None, is_shown=True)
    c.save()
    c = Category(name="undergraduate", icon=os.path.join(STATIC_PATH,'imgs/undergraduate.jpg'), desc=None, is_shown=True)
    c.save()
    c = Category(name="postgraduate", icon=os.path.join(STATIC_PATH,'imgs/postgraduate.jpg'), desc=None, is_shown=True)
    c.save()
    c = Category(name="alumni", icon=os.path.join(STATIC_PATH,'imgs/alumni.png'), desc=None, is_shown=True)
    c.save()
    c = Category(name="student life", icon= os.path.join(STATIC_PATH,'imgs/student_life.jpg'), desc=None, is_shown=True)
    c.save()
    for item in tuples_list:
            cat = Category.objects.get(name=item[0])
            url_file_name = convert_url_to_filename(item[1])+'.png'
            p = Page(category=cat, title=item[2], is_shown=True, url=item[1],screenshot=os.path.join('/', MEDIA_ROOT, url_file_name))
            p.save()
            print("page with"+ item[2] +"has been saved")
Ejemplo n.º 2
0
def populate_pages(url_list, category, halved_screen_shot=False):
    """

    :param url_list: a list of the urls for the pages that are going to be populated
    :param category: the category in which the pages fall into
    :return:
    """

    #For each url in the url_list
    f = open('page_meta_data.txt','a')
    for url in url_list:

        p = Page.objects.filter(url=url)
        pf = None
        if p:
            pf =p[0]
        if not pf:
            # create PageCapture object - specify the browser to be 800 x 600.
            try:
                pc = PageCapture(url,800, 600)
                url_file_name = convert_url_to_filename(url)+'.png'
                # To change to accomodate for the new changes
                image_file_name = os.path.join(DATA_DIR, url_file_name)
                pc.load_url(url)
                # fetch the screen-shot
                if halved_screen_shot:
                    pc.crop_screen_shot(image_file_name,0,0,1000,1000)
                    #pc.halve_screen_shot(image_file_name)
                else:
                    pc.take_screen_shot(image_file_name)

                # get the title
                title = pc.get_page_title()
                # create page in models/db with category
                # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url.

                #save to file instead of db here to decouple.
                f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,))
                print("written {0} to file.".format(title))

                p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name))
                p.save()
                print 'Page title= ' + p.title + ' has been saved!'

            except ValueError:
                print 'Page  has ((NOT)) been saved!'
                print 'ERROR IS {0}'.format("ValueError")
                continue
        else:
            print "Already added: {0}".format(pf.title, pf.url)
    f.close()
Ejemplo n.º 3
0
def populate_pages(url_list, category, halved_screen_shot=False):
    """

    :param url_list: a list of the urls for the pages that are going to be populated
    :param category: the category in which the pages fall into
    :return:
    """

    #For each url in the url_list
    f = open('page_meta_data.txt','a')
    for url in url_list:

        # create PageCapture object - specify the browser to be 800 x 600.
        try:
            pc = PageCapture(url,800, 600)
            url_file_name = convert_url_to_filename(url)+'.png'
            # To change to accomodate for the new changes
            image_file_name = os.path.join(DATA_DIR, url_file_name)
            pc.load_url(url)
            # fetch the screen-shot
            if halved_screen_shot:
                pc.crop_screen_shot(image_file_name,0,0,1000,1000)
                #pc.halve_screen_shot(image_file_name)
            else:
                pc.take_screen_shot(image_file_name)

            # get the title
            title = pc.get_page_title()
            # create page in models/db with category
            # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url.

            #save to file instead of db here to decouple.
            f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,))
            print("written {0} to file.".format(title))




            #p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name))
            #p.save()
            #print 'Page title= ' + p.title + ' has been saved!'

        except ValueError:
            print 'Page  has ((NOT)) been saved!'
            print 'ERROR IS {0}'.format("ValueError")
            continue
    f.close()
Ejemplo n.º 4
0
def populate_pages(url_list, category, halved_screen_shot=False):
    """

    :param url_list: a list of the urls for the pages that are going to be populated
    :param category: the category in which the pages fall into
    :return:
    """

    #For each url in the url_list
    for url in url_list:

        # create PageCapture object - specify the browser to be 800 x 600.
        try:
            pc = PageCapture(url, 800, 600)
            url_file_name = convert_url_to_filename(url) + '.png'
            # To change to accomodate for the new changes
            image_file_name = os.path.join(DATA_DIR, url_file_name)
            pc.load_url(url)
            # fetch the screen-shot
            if halved_screen_shot:
                if random.random() > 0.5:
                    pc.crop_screen_shot(image_file_name, 200, 400, 700, 900)
                else:
                    pc.crop_screen_shot(image_file_name, 0, 0, 1000, 1000)
                #pc.halve_screen_shot(image_file_name)
            else:
                pc.take_screen_shot(image_file_name)

            # get the title
            title = pc.get_page_title()
            # create page in models/db with category
            # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url.
            p = Page(category=category,
                     title=title,
                     is_shown=True,
                     url=url,
                     screenshot=os.path.join('/', MEDIA_ROOT, url_file_name))
            p.save()
            print 'Page title= ' + p.title + ' has been saved!'
        except ValueError:
            print 'Page  has ((NOT)) been saved!'
            print 'ERROR IS'
            print ValueError
            continue
Ejemplo n.º 5
0
def populate_pages(url_list, category, halved_screen_shot=False):
    """

    :param url_list: a list of the urls for the pages that are going to be populated
    :param category: the category in which the pages fall into
    :return:
    """

    #For each url in the url_list
    for url in url_list:

        # create PageCapture object - specify the browser to be 800 x 600.
        try:
            pc = PageCapture(url,800, 600)
            url_file_name = convert_url_to_filename(url)+'.png'
            # To change to accomodate for the new changes
            image_file_name = os.path.join(DATA_DIR, url_file_name)
            pc.load_url(url)
            # fetch the screen-shot
            if halved_screen_shot:
                if random.random() > 0.5:
                    pc.crop_screen_shot(image_file_name,200,400,700,900)
                else:
                    pc.crop_screen_shot(image_file_name,0,0,1000,1000)
                #pc.halve_screen_shot(image_file_name)
            else:
                pc.take_screen_shot(image_file_name)

            # get the title
            title = pc.get_page_title()
            # create page in models/db with category
            # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url.
            p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name))
            p.save()
            print 'Page title= ' + p.title + ' has been saved!'
        except ValueError:
            print 'Page  has ((NOT)) been saved!'
            print 'ERROR IS'
            print ValueError
            continue