def main(): print 'Populating the Database from page_meta_data.txt' print '************************************************' from ifind.models.game_models import Category, Page from configuration import MEDIA_ROOT, STATIC_PATH from ifind.common.utils import convert_url_to_filename tuples_list = read_in_file('page_meta_data.txt') c = Category(name="research", icon=os.path.join(STATIC_PATH,'imgs/research.jpg'), desc=None, is_shown=True) c.save() c = Category(name="about glasgow", icon=os.path.join(STATIC_PATH,'imgs/about_glasgow.jpg'), desc=None, is_shown=True) c.save() c = Category(name="undergraduate", icon=os.path.join(STATIC_PATH,'imgs/undergraduate.jpg'), desc=None, is_shown=True) c.save() c = Category(name="postgraduate", icon=os.path.join(STATIC_PATH,'imgs/postgraduate.jpg'), desc=None, is_shown=True) c.save() c = Category(name="alumni", icon=os.path.join(STATIC_PATH,'imgs/alumni.png'), desc=None, is_shown=True) c.save() c = Category(name="student life", icon= os.path.join(STATIC_PATH,'imgs/student_life.jpg'), desc=None, is_shown=True) c.save() for item in tuples_list: cat = Category.objects.get(name=item[0]) url_file_name = convert_url_to_filename(item[1])+'.png' p = Page(category=cat, title=item[2], is_shown=True, url=item[1],screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print("page with"+ item[2] +"has been saved")
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list f = open('page_meta_data.txt','a') for url in url_list: p = Page.objects.filter(url=url) pf = None if p: pf =p[0] if not pf: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. #save to file instead of db here to decouple. f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,)) print("written {0} to file.".format(title)) p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS {0}'.format("ValueError") continue else: print "Already added: {0}".format(pf.title, pf.url) f.close()
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list f = open('page_meta_data.txt','a') for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. #save to file instead of db here to decouple. f.write('%s,%s,%s,%s\n' % (category.name, url, title,image_file_name,)) print("written {0} to file.".format(title)) #p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) #p.save() #print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS {0}'.format("ValueError") continue f.close()
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url, 800, 600) url_file_name = convert_url_to_filename(url) + '.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: if random.random() > 0.5: pc.crop_screen_shot(image_file_name, 200, 400, 700, 900) else: pc.crop_screen_shot(image_file_name, 0, 0, 1000, 1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS' print ValueError continue
def populate_pages(url_list, category, halved_screen_shot=False): """ :param url_list: a list of the urls for the pages that are going to be populated :param category: the category in which the pages fall into :return: """ #For each url in the url_list for url in url_list: # create PageCapture object - specify the browser to be 800 x 600. try: pc = PageCapture(url,800, 600) url_file_name = convert_url_to_filename(url)+'.png' # To change to accomodate for the new changes image_file_name = os.path.join(DATA_DIR, url_file_name) pc.load_url(url) # fetch the screen-shot if halved_screen_shot: if random.random() > 0.5: pc.crop_screen_shot(image_file_name,200,400,700,900) else: pc.crop_screen_shot(image_file_name,0,0,1000,1000) #pc.halve_screen_shot(image_file_name) else: pc.take_screen_shot(image_file_name) # get the title title = pc.get_page_title() # create page in models/db with category # Abdullah , using DATA_DIR did not work for me because it uses the current working directory in the url. p = Page(category=category, title=title, is_shown=True, url=url, screenshot=os.path.join('/', MEDIA_ROOT, url_file_name)) p.save() print 'Page title= ' + p.title + ' has been saved!' except ValueError: print 'Page has ((NOT)) been saved!' print 'ERROR IS' print ValueError continue