コード例 #1
0
 def get_page_text(self):
     if self.s3_page_text:
         bucket = config.s3_bucket()
         urlpath = "%s/%s" % ('page_text', self.id)
         text = s3client.download_from_s3_as_string(bucket, urlpath)
         return text.decode('utf-8')
     return None
コード例 #2
0
def generate_info_files(s3_info_filename, info_filename, pos_dir, neg_dir):
    "generate info files for training and testing"
    if not os.path.exists(pos_dir):
        os.makedirs(pos_dir)
    if not os.path.exists(neg_dir):
        os.makedirs(neg_dir)
    bucket = affine_config.s3_bucket()
    fp = open(s3_info_filename, 'r')
    fo = open(info_filename, 'w')
    for f in fp:
        line = f.split(' ')
        image_label = int(line[1])
        video_id = int(line[2])
        time_stamp = int(line[3])
        filename = '%012d_%012d.jpg' % (video_id, time_stamp)
        video = Video.get(video_id)
        if video:
            all_tmps = video.s3_timestamps()
            if time_stamp in all_tmps:
                if image_label > 0:
                    outfile = os.path.join(pos_dir, filename)
                else:
                    outfile = os.path.join(neg_dir, filename)
                line_item = '%s %i %i %i' % (outfile, image_label, video_id,
                                             time_stamp)
                if os.path.exists(outfile):
                    fo.write(line_item + '\n')
                    continue
                print "downloading data from s3"
                # using the affine bucket for negative images
                img_path = 'thumbnail/%d/%d' % (video_id, time_stamp)
                s3client.download_from_s3(bucket, img_path, outfile)
                fo.write(line_item + '\n')
    fp.close()
    fo.close()
コード例 #3
0
 def upload_screenshot_full(self, path):
     bucket = config.s3_bucket()
     urlpath = "%s/%s" % ('screenshot_full', self.id)
     thumb_path = resize_image(path)
     s3client.upload_to_s3(bucket,
                           urlpath + '_thumb',
                           thumb_path,
                           public=True)
     convert_png_to_jpeg(path, path, quality=60)
     s3client.upload_to_s3(bucket, urlpath, path, public=True)
コード例 #4
0
def get_page_text_dict(page_ids, silent=False):
    """
    Retrieves page_text for given page_ids.
    :param page_ids: List of page_ids to retrieve page text for.
    :param silent: If set to true, will not forward any errors for non-existent page_ids.  Defaults to False and returns
            empty text for non-existent page_ids.
    :return: dictionary with mapping page_id -> page_text.
    """
    page_ids = set(page_ids)
    output = {page_id: "" for page_id in page_ids}
    bucket = config.s3_bucket()
    s3_conn = s3client.connect(bucket)

    for page_id in page_ids:
        urlpath = "%s/%s" % ('page_text', page_id)
        try:
            text = s3_conn.get_key(urlpath).get_contents_as_string()
            output[page_id] = text.decode('utf-8')
        except AttributeError as e:
            if not silent:
                raise e

    return output
コード例 #5
0
 def upload_favicon(self, path):
     bucket = config.s3_bucket()
     urlpath = "%s/%s" % ('favicon', self.id)
     s3client.upload_to_s3(bucket, urlpath, path, public=True)
コード例 #6
0
 def upload_page_text(self, text):
     bucket = config.s3_bucket()
     urlpath = "%s/%s" % ('page_text', self.id)
     text = text.encode('utf-8')
     s3client.upload_to_s3_from_string(bucket, urlpath, text, public=True)
コード例 #7
0
 def upload_page_source(self, path):
     bucket = config.s3_bucket()
     urlpath = "%s/%s" % ('page_source', self.id)
     s3client.upload_to_s3(bucket, urlpath, path, public=True)
コード例 #8
0
 def upload_screenshot(self, path):
     bucket = config.s3_bucket()
     urlpath = "%s/%s" % ('screenshot', self.id)
     s3client.upload_to_s3(bucket, urlpath, path, public=True)
     path = resize_image(path)
     s3client.upload_to_s3(bucket, urlpath + '_thumb', path, public=True)
コード例 #9
0
 def s3_favicon_url(self):
     if self.s3_favicon:
         bucket = config.s3_bucket()
         return "http://%s.s3.amazonaws.com/favicon/%s" % (bucket, self.id)
コード例 #10
0
 def s3_page_text_url(self):
     if self.s3_page_text:
         bucket = config.s3_bucket()
         return "http://%s.s3.amazonaws.com/page_text/%s" % (bucket,
                                                             self.id)
コード例 #11
0
 def s3_screenshot_full_url(self):
     if self.s3_screenshot_full:
         bucket = config.s3_bucket()
         return "http://%s.s3.amazonaws.com/screenshot_full/%s" % (bucket,
                                                                   self.id)
コード例 #12
0
 def s3_screenshot_url(self, for_new_screenshot=False):
     if self.s3_screenshot or for_new_screenshot:
         bucket = config.s3_bucket()
         return "http://%s.s3.amazonaws.com/screenshot/app/%s" % (bucket,
                                                                  self.id)
コード例 #13
0
 def construct_s3_image_url(video_id, timestamp):
     bucket = config.s3_bucket()
     return "http://%s.s3.amazonaws.com/thumbnail/%s/%s" % (
         bucket, video_id, timestamp)
コード例 #14
0
 def bucket(self):
     return config.s3_bucket()