def testMergeFiles(self):
        """Test merging multiple files."""
        input_data = [(str(i), "_" + str(i)) for i in range(100)]
        input_data.sort()

        bucket_name = "testbucket"
        test_filename = "testfile"
        full_filename = "/%s/%s" % (bucket_name, test_filename)

        with cloudstorage.open(full_filename, mode="w") as f:
            with records.RecordsWriter(f) as w:
                for (k, v) in input_data:
                    proto = kv_pb.KeyValue()
                    proto.set_key(k)
                    proto.set_value(v)
                    w.write(proto.Encode())

        p = TestMergePipeline(bucket_name, [full_filename, full_filename, full_filename])
        p.start()
        test_support.execute_until_empty(self.taskqueue)
        p = TestMergePipeline.from_id(p.pipeline_id)

        output_file = p.outputs.default.value[0]
        output_data = []
        with cloudstorage.open(output_file) as f:
            for record in records.RecordsReader(f):
                output_data.append(record)

        expected_data = [str((k, [v, v, v], False)) for (k, v) in input_data]
        self.assertEquals(expected_data, output_data)
        self.assertEquals(1, len(self.emails))
Exemple #2
0
def process_images():
    if request.method == 'POST':
        currentFileName  = request.form['filename']
        newFormat = request.form['format']
        encodingParameter = images.JPEG
        if newFormat == 'png':
            encodingParameter  = images.PNG
        if newFormat == 'gif':
            encodingParameter  = images.GIF
        if newFormat == 'bmp':
            encodingParameter  = images.BMP
        newFileName = ""
        for i in range(len(currentFileName.split('.'))-1 ):
            newFileName =  newFileName + currentFileName.split('.')[i] + '.'
        newFileName = newFileName + newFormat

        f = gcs.open( BUCKET_PATH + currentFileName)
        img = images.Image(image_data=f.read())
        f.close()

        try:
            newWidth = int(request.form['width'])
        except:
            newWidth = img.height
        try:
            newHeight = int(request.form['height'])
        except:
            newHeight = img.width

        img.resize(width= newWidth, height=newHeight, allow_stretch=True )
        processedImage = img.execute_transforms(output_encoding=encodingParameter)
        f = gcs.open(BUCKET_PATH + newFileName, "w")
        f.write(processedImage)
        f.close()
        return final_upload(newFileName)
Exemple #3
0
    def __getBlob(cls, _key):
        """ Private method that gets blob content
            param @key is String
            return tuple (value, tll)
        """

        if not _key is None:

            _clau = cls.__getBlobkey(_key)
            if not _clau is None:
                br = gcs.open(cls.__bucket_name + _key)
                value = br.read()
                br.close()

                return cls.__checkIfExpired(value);
            else:
                z=0
                _clau = cls.__getBlobkey(_key+"_"+str(z))
                s=[]
                while not _clau is None:
                    br = gcs.open(cls.__bucket_name + _clau)
                    s.append(br.read())
                    br.close()

                    z=z+1
                    _clau = cls.__getBlobkey(_key+"_"+str(z))

                if z>0:    
                    value = "".join(s)	
                    return cls.__checkIfExpired(value);

                else:
                    return (None,None)
        else:
            return (None,None)        
Exemple #4
0
	def run(self):

		#INSERT OPERATION (in cache)
		for name in self.filenames:
			if((filesizes[name]<=100000) and self.use_cache): #If the size of the file is < 100 kb and memcache option is on
			   name_gcs = bucket+'/'+ name.encode()
			   gcs_file = gcs.open(name_gcs, 'r')
			   value = gcs_file.read()
			   insertCache(name, value)
			
		#RETRIEVE OPERATION
		start_retrieve = datetime.now()
		fileaccess = numpy.random.random_integers(0,len(self.filenames)-1,2*len(self.filenames)) #Generating 2 random accesses per file following uniform distribution	
		for i in range(len(fileaccess)):	
			key = self.filenames[fileaccess[i]]
			filename = bucket + '/' + key.encode()
			value = findCache(key)
			if (value is None):
				if check(filename):
					with gcs.open(filename, 'r') as f:
						for line in f:
							value = f.readline()
		RETRIEVE_TIME.append((datetime.now() - start_retrieve).total_seconds())
			
		#REMOVE OPERATION
		start_remove = datetime.now()
		for name in self.filenames:
			remove(name)
		REMOVE_TIME.append((datetime.now() - start_remove).total_seconds())
def Check(EmployeeDataFilePath,emailID,fallOutReport):
	FileName = EmployeeDataFilePath.split('/')[-1].split(".xlsx")[0]
	EmployeeDataFilePathTsv = XlsxToTsv(EmployeeDataFilePath)
	FieldId,Employees,TotalEmployee = EmployeeData(EmployeeDataFilePathTsv)
	Errors = ErrorList(Employees,TotalEmployee,ProductionTemplateFileName)
	if len(Errors) == 0 :
		if fallOutReport == 1:
			FileName = FallOutReportXlsx(FieldId,Employees,TotalEmployee,ProductionTemplateFileName,FileName)
			Message = open("SuccessEmailBody.txt").read()
			Subject = "Success Factor Upload FallOut Report"
			mail.send_mail(sender=sender_email_id.format(
                	app_identity.get_application_id()),
                	to=emailID,
                	subject=Subject,
                	body=Message,attachments=[(FileName, gcs.open(FileName).read())])
			gcs.delete(FileName)
	else:
		FileName = XlsxErrorReport(Errors,FileName)
		Message = open("ErrorEmailBody.txt").read()
		Subject = "Success Factor Upload File Error"
		mail.send_mail(sender=sender_email_id.format(
                app_identity.get_application_id()),
                to=emailID,
                subject=Subject,
                body=Message,attachments=[(FileName, gcs.open(FileName).read())])
		gcs.delete(FileName)
	if len(Errors) == 0:
		return 1
	else:
		return 0
    def testSortFile(self):
        """Test sorting a file."""
        bucket_name = "testbucket"
        test_filename = "testfile"
        full_filename = "/%s/%s" % (bucket_name, test_filename)

        input_data = [(str(i), "_" + str(i)) for i in range(100)]

        with cloudstorage.open(full_filename, mode="w") as f:
            with records.RecordsWriter(f) as w:
                for (k, v) in input_data:
                    proto = kv_pb.KeyValue()
                    proto.set_key(k)
                    proto.set_value(v)
                    w.write(proto.Encode())

        p = shuffler._SortChunksPipeline("testjob", bucket_name, [[full_filename]])
        p.start()
        test_support.execute_until_empty(self.taskqueue)
        p = shuffler._SortChunksPipeline.from_id(p.pipeline_id)

        input_data.sort()
        output_files = p.outputs.default.value[0]
        output_data = []
        for output_file in output_files:
            with cloudstorage.open(output_file) as f:
                for binary_record in records.RecordsReader(f):
                    proto = kv_pb.KeyValue()
                    proto.ParseFromString(binary_record)
                    output_data.append((proto.key(), proto.value()))

        self.assertEquals(input_data, output_data)
        self.assertEquals(1, len(self.emails))
Exemple #7
0
def maybe_process_image(image_url, base_name):
  if CLOUD_STORAGE_ROOT_URL in image_url:
    return (image_url, None)

  image_result = urlfetch.fetch(image_url)
  if image_result.status_code < 200 or image_result.status_code >= 300:
    raise IOError('Error downloading image: HTTP %d.' % image_result.status_code)

  base_filename = re.sub(r'[^\w]+', '-', base_name.strip().lower())

  # main image
  image_gcs_path = '/muzeifeaturedart/' + base_filename + '.jpg'
  # upload with default ACLs set on the bucket  # or use options={'x-goog-acl': 'public-read'})
  gcs_file = gcs.open(image_gcs_path, 'w', content_type='image/jpeg')
  gcs_file.write(image_result.content)
  gcs_file.close()

  # thumb
  thumb_gcs_path = '/muzeifeaturedart/' + base_filename + '_thumb.jpg'
  thumb = images.Image(image_result.content)
  thumb.resize(width=(thumb.width * 600 / thumb.height), height=600)
  thumb_contents = thumb.execute_transforms(output_encoding=images.JPEG, quality=40)
  gcs_file = gcs.open(thumb_gcs_path, 'w', content_type='image/jpeg')
  gcs_file.write(thumb_contents)
  gcs_file.close()

  return (CLOUD_STORAGE_ROOT_URL + image_gcs_path,
          CLOUD_STORAGE_ROOT_URL + thumb_gcs_path)
Exemple #8
0
    def finish(self):
        """
        Called when all shards have finished processing
        """
        if self.get_meta().generate_error_csv:
            self.error_csv_filename = self._error_csv_filename()

            with cloudstorage.open(self.error_csv_filename, 'w') as f:
                # Concat all error csvs from shards into 1 file
                has_written = False
                for shard in ImportShard.objects.filter(task_id=self.pk, task_model_path=self.model_path):
                    if not shard.error_csv_filename:
                        continue

                    # If this is the first row, write the column headers
                    if not has_written:
                        data = json.loads(shard.source_data_json)[0]
                        cols = getattr(self, "detected_columns", sorted(data.keys())) + [ "errors" ]
                        csvwriter = csv.writer(f)
                        csvwriter.writerow(cols)
                        has_written = True

                    # Write the shard's error file into the master file
                    f.write(cloudstorage.open(shard.error_csv_filename).read())
                    cloudstorage.delete(shard.error_csv_filename)

            if has_written:
                # Create a blobstore key for the GCS file
                blob_key = create_gs_key('/gs%s' % self.error_csv_filename)
                self.error_csv = '%s/errors.csv' % blob_key
                self.save()
            else:
                cloudstorage.delete(self.error_csv_filename)
    def post(self):
        # get args
        self.start_cursor = self.request.get('cursor')
        self.filtering_event_key = self.request.get('event')
        self.filename = self.request.get('filename')
        self.csv_header = self.request.get('csv_header')
        self.worker_url = self.request.get('worker_url')

        self.event = Event.get(self.filtering_event_key) if self.filtering_event_key else None

        # get (base) query, skip query to cursor, filter for sites
        query = self.get_base_query()
        if self.start_cursor:
            query.with_cursor(self.start_cursor)
        fetched_sites = query.fetch(limit=self.sites_per_task)
        sites = self.filter_sites(fetched_sites)

        # write part of csv file to GCS
        csv_part_gcs_fd = cloudstorage.open(
            BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor,
            'w',
            content_type='text/csv'
        )
        self._write_csv_rows(csv_part_gcs_fd, sites)
        csv_part_gcs_fd.close()

        # decide what to do next
        self.end_cursor = query.cursor()
        if self.end_cursor and self.start_cursor != self.end_cursor:
            # chain to next task
            taskqueue.add(
                url=self.worker_url,
                params=self.get_continuation_param_dict(),
                retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3),
            )
        else:
            # finish file: combine parts and deduplicate lines
            logging.info(u"Deduplicating to create %s ..." % self.filename)

            sio = StringIO()
            path_prefix = BUCKET_NAME + '/' + self.filename + '.part'
            for gcs_file_stat in cloudstorage.listbucket(path_prefix):
                csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename)
                for line in csv_part_gcs_fd:
                    sio.write(line)
                csv_part_gcs_fd.close()
            sio.seek(0)
            deduplicated_lines = set(line for line in sio)

            # write csv header and deduplicated lines to new file
            csv_complete_gcs_fd = cloudstorage.open(
                BUCKET_NAME + '/' + self.filename,
                'w',
                content_type='text/csv'
            )
            csv_complete_gcs_fd.write(self.csv_header.encode('utf-8'))
            for line in deduplicated_lines:
                csv_complete_gcs_fd.write(line)
            csv_complete_gcs_fd.close()
    def get(self, event_key):
        event = Event.get_by_id(event_key)

        event.prepAwardsMatchesTeams()

        if event.awards:
            with cloudstorage.open(
                self.AWARDS_FILENAME_PATTERN.format(event.year, event_key, event_key), "w"
            ) as awards_file:
                writer = csv.writer(awards_file, delimiter=",")
                for award in event.awards:
                    for recipient in award.recipient_list:
                        team = recipient["team_number"]
                        if type(team) == int:
                            team = "frc{}".format(team)
                        self._writerow_unicode(writer, [award.key.id(), award.name_str, team, recipient["awardee"]])

        if event.matches:
            with cloudstorage.open(
                self.MATCHES_FILENAME_PATTERN.format(event.year, event_key, event_key), "w"
            ) as matches_file:
                writer = csv.writer(matches_file, delimiter=",")
                for match in event.matches:
                    red_score = match.alliances["red"]["score"]
                    blue_score = match.alliances["blue"]["score"]
                    self._writerow_unicode(
                        writer,
                        [match.key.id()]
                        + match.alliances["red"]["teams"]
                        + match.alliances["blue"]["teams"]
                        + [red_score, blue_score],
                    )

        if event.teams:
            with cloudstorage.open(
                self.TEAMS_FILENAME_PATTERN.format(event.year, event_key, event_key), "w"
            ) as teams_file:
                writer = csv.writer(teams_file, delimiter=",")
                self._writerow_unicode(writer, [team.key.id() for team in event.teams])

        if event.rankings:
            with cloudstorage.open(
                self.RANKINGS_FILENAME_PATTERN.format(event.year, event_key, event_key), "w"
            ) as rankings_file:
                writer = csv.writer(rankings_file, delimiter=",")
                for row in event.rankings:
                    self._writerow_unicode(writer, row)

        if event.alliance_selections:
            with cloudstorage.open(
                self.ALLIANCES_FILENAME_PATTERN.format(event.year, event_key, event_key), "w"
            ) as alliances_file:
                writer = csv.writer(alliances_file, delimiter=",")
                for alliance in event.alliance_selections:
                    self._writerow_unicode(writer, alliance["picks"])

        self.response.out.write("Done backing up {}!".format(event_key))
  def testReadEmptyFile(self):
    f = cloudstorage.open(TESTFILE, 'w')
    f.write('')
    f.close()

    f = cloudstorage.open(TESTFILE)
    self.assertEqual('', f.read())
    self.assertEqual('', f.read())
    f.close()
 def testFilenameEscaping(self):
   name = BUCKET + '/a b/c d/*%$'
   with cloudstorage.open(name, 'w') as f:
     f.write('foo')
   with cloudstorage.open(name) as f:
     self.assertEqual('foo', f.read())
   self.assertEqual(name, cloudstorage.stat(name).filename)
   bucket = cloudstorage.listbucket(BUCKET)
   for stat in bucket:
     self.assertEqual(name, stat.filename)
   cloudstorage.delete(name)
  def testReadSmall(self):
    f = cloudstorage.open(TESTFILE, 'w')
    f.write('abcdefghij')
    f.close()

    f = cloudstorage.open(TESTFILE, read_buffer_size=3)
    self.assertEqual('ab', f.read(2))
    self.assertEqual('c', f.read(1))
    self.assertEqual('de', f.read(2))
    self.assertEqual('fghij', f.read())
    f.close()
  def WriteInBlockSizeTest(self):
    f = cloudstorage.open(TESTFILE, 'w')
    f.write('a'*256*1024)
    f.write('b'*256*1024)
    f.close()

    f = cloudstorage.open(TESTFILE)
    self.assertEqual('a'*256*1024 + 'b'*256*1024, f.read())
    self.assertEqual('', f.read())
    self.assertEqual('', f.readline())
    f.close()
  def testFlush2(self):
    blocksize = 0
    with cloudstorage.open(TESTFILE, 'w') as f:
      blocksize = f._blocksize
      f.write('a'*(blocksize+1))
      f.write('a')
      f.write('a'*(blocksize-1))
      f.flush()
      self.assertEqual(1, f._buffered)

    with cloudstorage.open(TESTFILE) as f:
      self.assertEqual(blocksize*2+1, len(f.read()))
  def testReadEntireFile(self):
    f = cloudstorage.open(TESTFILE, 'w')
    f.write('abcde')
    f.close()

    f = cloudstorage.open(TESTFILE, read_buffer_size=1)
    self.assertEqual('abcde', f.read())
    f.close()

    f = cloudstorage.open(TESTFILE)
    self.assertEqual('abcde', f.read(8))
    f.close()
def _create_zip(result_path, human_user_email, app_id, data_export_email):
    zip_path = os.path.join(result_path, 'result.zip')
    with cloudstorage.open(zip_path, 'w') as zip_stream:
        with ZipFile(zip_stream, 'w', allowZip64=True) as bzf:
            for f in cloudstorage.listbucket(os.path.join(result_path, 'result')):
                with cloudstorage.open(f.filename, 'r') as file_stream:
                    filename = f.filename.replace('%s/result/' % result_path, '')
                    for chunk in file_stream:
                        bzf.writestr(filename, chunk)
    download_url = get_serving_url(zip_path)
    deferred.defer(_send_export_email, result_path, human_user_email, app_id, data_export_email, download_url,
                   _queue=DATA_EXPORT_QUEUE)
Exemple #18
0
    def finish(self, reportDone=True):
        """Called when the worker has finished, to allow for any final work to be done."""
        progress = None
        if reportDone:
            if self.report.ftype == REPORT.XLS:
                self.gcs_file.close()
                readable_gcs_file = gcs.open(self.gcs_file.name, 'r')
                data = readable_gcs_file.read().split("\n")
                readable_gcs_file.close()
                self.gcs_file = gcs.open(self.gcs_file.name, 'w')
                y = 0
                for r in data:
                    if not r:
                        continue
                    if y > REPORT.XLS_ROW_LIMIT:
                        logging.warning("Excel report exceeded row limit and was truncated")
                        break
                    y += 1
                    row = []
                    try:
                        row = json.loads(r)
                    except Exception, ex:
                        logging.error("Unable to json load row: %s (%s)" % (r, ex))
                    else:
                        for x, cell in enumerate(row):
                            if cell:
                                if x in self.report.date_columns:
                                    self.ws.write(y, x, cell, self.xls_styles['datetime'])
                                else:
                                    self.ws.write(y, x, cell)            
                        if self.make_sub_reports:
                            #TODO: Write section_work_sheet, survey to excel is not enabled for now though
                            pass
                self.wb.save(self.gcs_file)

            self.gcs_file.close()            
            if self.has_section_files():
                for section_gcs_file in self.section_gcs_files:
                    section_gcs_file.close()

            self.report.status = REPORT.DONE
            self.report.dt_generated = datetime.now()
            self.report.put()
            duration = self.report.getDuration()
            logging.debug("GCSReportWorker finished. Counters: %s. Report ran for %d seconds." % (self.counters, duration))
            progress = {
                "status": REPORT.DONE,
                "resource":self.report.getGCSFile(),
                "generated": tools.unixtime(dt=self.report.dt_generated),
                "report": self.report.json(),
                "duration": duration
            }
Exemple #19
0
    def finish(self):
        """
        Called when all shards have finished processing
        """
        # Refresh object
        self = self._meta.model.objects.get(pk=self.pk)

        # If this was called before, don't do anything
        if self.status == ImportStatus.FINISHED:
            return

        if self.get_meta().generate_error_csv:
            self.error_csv_filename = self._error_csv_filename()

            with cloudstorage.open(self.error_csv_filename, 'w') as f:
                # Concat all error csvs from shards into 1 file
                has_written = False

                shards = self.get_shard_model().objects.filter(task_id=self.pk, task_model_path=self.model_path)

                # The shards haven't necessarily finished writing their error files when this is called,
                # because that happens in a defer. So we redefer this until they're all done.
                if [shard for shard in shards if not shard.error_csv_written]:
                    self.defer(self.finish)
                    return

                for shard in shards:
                    if not shard.error_csv_filename:
                        continue

                    # If this is the first row, write the column headers
                    if not has_written:
                        data = json.loads(shard.source_data_json)[0]
                        cols = getattr(self, "detected_columns", data.keys()) + [ "errors" ]
                        csvwriter = csv.writer(f)
                        csvwriter.writerow(cols)
                        has_written = True

                    # Write the shard's error file into the master file
                    f.write(cloudstorage.open(shard.error_csv_filename).read())
                    cloudstorage.delete(shard.error_csv_filename)

            if has_written:
                # Create a blobstore key for the GCS file
                blob_key = create_gs_key('/gs%s' % self.error_csv_filename)
                self.error_csv = '%s/errors.csv' % blob_key
            else:
                cloudstorage.delete(self.error_csv_filename)

        self.status = ImportStatus.FINISHED
        self.save()
Exemple #20
0
def maybe_process_image(image_url, crop_tuple, base_name):
  if CLOUD_STORAGE_ROOT_URL in image_url and crop_tuple == NO_CROP_TUPLE:
    return (image_url, None)

  image_result = urlfetch.fetch(image_url)
  if image_result.status_code < 200 or image_result.status_code >= 300:
    raise IOError('Error downloading image: HTTP %d.' % image_result.status_code)

  filename = re.sub(r'[^\w]+', '-', base_name.strip().lower()) + '.jpg'

  # main image
  image_gcs_path = CLOUD_STORAGE_BASE_PATH + '/fullres/' + filename
  # resize to max width 4000 or max height 2000
  image_contents = image_result.content
  image = images.Image(image_contents)
  edited = False
  if image.height > 2000:
    image.resize(width=(image.width * 2000 / image.height), height=2000)
    edited = True
  elif image.width > 4000:
    image.resize(width=4000, height=(image.height * 4000 / image.width))
    edited = True

  if crop_tuple != NO_CROP_TUPLE:
    image.crop(*crop_tuple)
    edited = True

  if edited:
    image_contents = image.execute_transforms(output_encoding=images.JPEG, quality=80)

  # upload with default ACLs set on the bucket  # or use options={'x-goog-acl': 'public-read'})
  gcs_file = gcs.open(image_gcs_path, 'w', content_type='image/jpeg')
  gcs_file.write(image_contents)
  gcs_file.close()

  # thumb
  thumb_gcs_path = CLOUD_STORAGE_BASE_PATH + '/thumbs/' + filename
  thumb = images.Image(image_result.content)
  thumb.resize(width=(thumb.width * THUMB_HEIGHT / thumb.height), height=THUMB_HEIGHT)

  if crop_tuple != NO_CROP_TUPLE:
    thumb.crop(*crop_tuple)
    edited = True

  thumb_contents = thumb.execute_transforms(output_encoding=images.JPEG, quality=40)
  gcs_file = gcs.open(thumb_gcs_path, 'w', content_type='image/jpeg')
  gcs_file.write(thumb_contents)
  gcs_file.close()

  return (CLOUD_STORAGE_ROOT_URL + image_gcs_path,
          CLOUD_STORAGE_ROOT_URL + thumb_gcs_path)
  def testRemoveGarbage(self):
    """Make sure abandoned files get removed."""
    writer_spec = {self.WRITER_CLS.BUCKET_NAME_PARAM: "unused",
                   self.WRITER_CLS.TMP_BUCKET_NAME_PARAM: "test"}
    mapreduce_state = self.create_mapreduce_state(output_params=writer_spec)
    shard_state = self.create_shard_state(1)
    ctx = context.Context(mapreduce_state.mapreduce_spec, shard_state)
    context.Context._set(ctx)

    writer = self.WRITER_CLS.create(mapreduce_state.mapreduce_spec,
                                    shard_state.shard_number, 0)
    writer.begin_slice(None)

    # our shard
    our_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-1-very-random"
    f = cloudstorage.open(our_file, "w")
    f.write("foo?")
    f.close()

    # not our shard
    their_file = "/test/gae_mr_tmp/DummyMapReduceJobId-tmp-3-very-random"
    f = cloudstorage.open(their_file, "w")
    f.write("bar?")
    f.close()

    # unrelated file
    real_file = "/test/this_things_should_survive"
    f = cloudstorage.open(real_file, "w")
    f.write("yes, foobar!")
    f.close()

    # Make sure bogus file still exists
    names = [l.filename for l in cloudstorage.listbucket("/test")]
    self.assertTrue(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)

    # slice end should clean up the garbage
    writer = self._serialize_and_deserialize(writer)

    names = [l.filename for l in cloudstorage.listbucket("/test")]
    self.assertFalse(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)

    # finalize shouldn't change anything
    writer.finalize(ctx, shard_state)
    self.assertFalse(our_file in names)
    self.assertTrue(their_file in names)
    self.assertTrue(real_file in names)
  def testSeekAndTell(self):
    f = cloudstorage.open(TESTFILE, 'w')
    f.write('abcdefghij')
    f.close()

    f = cloudstorage.open(TESTFILE)
    f.seek(5)
    self.assertEqual(5, f.tell())
    self.assertEqual('f', f.read(1))
    self.assertEqual(6, f.tell())
    f.seek(-1, os.SEEK_CUR)
    self.assertEqual('f', f.read(1))
    f.seek(-1, os.SEEK_END)
    self.assertEqual('j', f.read(1))
Exemple #23
0
def addtoDB(index, url):
    import json
    file = gcs.open(BUCKET_PATH + 'db.json', 'r')
    data = file.read()
    file.close()
    db = json.loads(data)
    if index not in db:
        db[index] = [url]
    else:
        db[index].append(url)
    json_str = json.dumps(db)
    file = gcs.open(BUCKET_PATH+'db.json', 'w')
    file.write(json_str)
    file.close()
  def testPartialRecords(self):
    """Test merging into partial key values."""
    try:
      self._prev_max_values_count = shuffler._MergePipeline._MAX_VALUES_COUNT
      # force max values count to extremely low value.
      shuffler._MergePipeline._MAX_VALUES_COUNT = 1

      input_data = [("1", "a"), ("2", "b"), ("3", "c")]
      input_data.sort()

      bucket_name = "testbucket"
      test_filename = "testfile"
      full_filename = "/%s/%s" % (bucket_name, test_filename)

      with cloudstorage.open(full_filename, mode="w") as f:
        with records.RecordsWriter(f) as w:
          for (k, v) in input_data:
            proto = file_service_pb.KeyValue()
            proto.set_key(k)
            proto.set_value(v)
            w.write(proto.Encode())

      p = TestMergePipeline(bucket_name,
                            [full_filename, full_filename, full_filename])
      p.start()
      test_support.execute_until_empty(self.taskqueue)
      p = TestMergePipeline.from_id(p.pipeline_id)

      output_file = p.outputs.default.value[0]
      output_data = []
      with cloudstorage.open(output_file) as f:
        for record in records.RecordsReader(f):
          output_data.append(record)

      expected_data = [
          ("1", ["a"], True),
          ("1", ["a"], True),
          ("1", ["a"], False),
          ("2", ["b"], True),
          ("2", ["b"], True),
          ("2", ["b"], False),
          ("3", ["c"], True),
          ("3", ["c"], True),
          ("3", ["c"], False),
          ]
      self.assertEquals([str(e) for e in expected_data], output_data)
    finally:
      shuffler._MergePipeline._MAX_VALUES_COUNT = self._prev_max_values_count
    self.assertEquals(1, len(self.emails))
def XlsxToTsv(FilePath):
	reload(sys)
	sys.setdefaultencoding('utf-8')
	wb = xlrd.open_workbook(file_contents = gcs.open(FilePath).read())
	sh = wb.sheet_by_index(0)

	FilePathTsv= FilePath.split(".xlsx")[0]
	csvFile = gcs.open(FilePathTsv+'.tsv', 'w')
	wr = csv.writer(csvFile,delimiter='\t')

	for rownum in xrange(sh.nrows):
        	wr.writerow(sh.row_values(rownum))
	csvFile.close()
	gcs.delete(FilePath)
	return FilePathTsv+".tsv"
  def testFlush(self):
    blocksize = 0
    with cloudstorage.open(TESTFILE, 'w') as f:
      blocksize = f._blocksize
      f.write('a'*(blocksize-2))
      f.write('a'*3)
      f.write('a')
      f.flush()
      self.assertEqual(2, f._buffered)
      f.flush()
      f.write('a')
      f.close()

    with cloudstorage.open(TESTFILE) as f:
      self.assertEqual(blocksize + 3, len(f.read()))
  def setUpMultipleFile(self):
    f = cloudstorage.open(self.seg_prefix + "0", "w",
                          options={self.writer_cls._VALID_LENGTH: "5"})
    f.write("12345garbage")
    f.close()

    f = cloudstorage.open(self.seg_prefix + "1", "w",
                          options={self.writer_cls._VALID_LENGTH: "5"})
    f.write("67890garbage")
    f.close()

    f = cloudstorage.open(self.seg_prefix + "2", "w",
                          options={self.writer_cls._VALID_LENGTH: "6"})
    f.write("123456garbage")
    f.close()
Exemple #28
0
    def _shrink_note(self, note):
        for file_key in note.files:
            file = file_key.get()
            try:
                with cloudstorage.open(file.full_path) as f:
                    image = images.Image(f.read())
                    image.resize(640)
                    new_image_data = image.execute_transforms()

                content_t = images_formats.get(str(image.format))
                with cloudstorage.open(file.full_path, 'w',
                                       content_type=content_t) as f:
                    f.write(new_image_data)

            except images.NotImageError:
                pass
Exemple #29
0
    def _create_note(self, user, title, content, attachments):

        note = Note(parent=ndb.Key("User", user.nickname()),
                    title=title,
                    content=content)
        note.put()

        if attachments:
            bucket_name = app_identity.get_default_gcs_bucket_name()
            for file_name, file_content in attachments:
                content_t = mimetypes.guess_type(file_name)[0]
                real_path = os.path.join('/', bucket_name, user.user_id(), file_name)

                with cloudstorage.open(real_path, 'w', content_type=content_t,
                                       options={'x-goog-acl': 'public-read'}) as f:
                    f.write(file_content.decode())

                key = blobstore.create_gs_key('/gs' + real_path)
                try:
                    url = images.get_serving_url(key, size=0)
                    thumbnail_url = images.get_serving_url(key, size=150, crop=True)
                except images.TransformationError, images.NotImageError:
                    url = "http://storage.googleapis.com{}".format(real_path)
                    thumbnail_url = None

                f = NoteFile(parent=note.key, name=file_name,
                             url=url, thumbnail_url=thumbnail_url,
                             full_path=real_path)
                f.put()
                note.files.append(f.key)

            note.put()
Exemple #30
0
def get_image(image_key):
    """Serve the content image with the given key."""
    import cloudstorage as gcs

    if not image_key:
        logging.error("No image key provided")
        return

    image = memcache.get(image_key)

    if image:
        return Response(image, mimetype='image/jpeg')

    image = gcs.open('/content_images/%s' % image_key)
    data = image.read()
    response = Response(data, mimetype='image/jpeg')
    image.close()

    # TODO: Memcache cannot handle images greater than 1MB
    memcache.set(image_key, data)

    return response
def set_last_end_time(bucket_name, end_time_str):
    """ Write the end_time as a string value in a JSON object in GCS. 
        This file is used to remember the last end_time in case one isn't provided
    """
    # get the datetime object
    end_time = datetime.strptime(end_time_str, '%Y-%m-%dT%H:%M:%S.%fZ')
    delta = timedelta(seconds=1)
    # Add 1 second & convert back to str
    end_time_calc = end_time + delta
    end_time_calc_str = end_time_calc.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

    logging.debug("end_time_str: {}, end_time_Calc_str: {}".format(
        end_time_str, end_time_calc_str))
    end_time_str_json = {"end_time": end_time_calc_str}
    write_retry_params = gcs.RetryParams(backoff_factor=1.1)
    gcs_file = gcs.open('/{}/{}'.format(bucket_name,
                                        config.LAST_END_TIME_FILENAME),
                        'w',
                        content_type='text/plain',
                        retry_params=write_retry_params)
    gcs_file.write(json.dumps(end_time_str_json))
    gcs_file.close()
Exemple #32
0
def GetTracePaths(bucket):
    """Returns a list of trace files in a bucket.

  Finds and loads the trace databases, and returns their content as a list of
  paths.

  This function assumes a specific structure for the files in the bucket. These
  assumptions must match the behavior of the backend:
  - The trace databases are located in the bucket.
  - The trace databases files are the only objects with the
    TRACE_DATABASE_PREFIX prefix in their name.

  Returns:
    list: The list of paths to traces, as strings.
  """
    traces = []
    prefix = os.path.join('/', bucket,
                          common.clovis_paths.TRACE_DATABASE_PREFIX)
    file_stats = cloudstorage.listbucket(prefix)

    for file_stat in file_stats:
        database_file = file_stat.filename
        clovis_logger.info('Loading trace database: ' + database_file)

        with cloudstorage.open(database_file) as remote_file:
            json_string = remote_file.read()
        if not json_string:
            clovis_logger.warning('Failed to download: ' + database_file)
            continue

        database = LoadingTraceDatabase.FromJsonString(json_string)
        if not database:
            clovis_logger.warning('Failed to parse: ' + database_file)
            continue

        for path in database.ToJsonDict():
            traces.append(path)

    return traces
Exemple #33
0
def store_feed_for_band_key(the_band_key, the_feed):

    bucket_name = os.environ.get('BUCKET_NAME',
                                 app_identity.get_default_gcs_bucket_name())

    # self.response.headers['Content-Type'] = 'text/plain'
    # self.response.write('Demo GCS Application running from Version: '
    #                   + os.environ['CURRENT_VERSION_ID'] + '\n')
    # self.response.write('Using bucket name: ' + bucket_name + '\n\n')

    filename = "/{0}/{1}{2}".format(bucket_name, "rss-",
                                    the_band_key.urlsafe())
    # self.response.write('Creating file %s\n' % filename)

    write_retry_params = gcs.RetryParams(backoff_factor=1.1)
    gcs_file = gcs.open(filename,
                        'w',
                        content_type='text/plain',
                        retry_params=write_retry_params)
    gcs_file.write(the_feed.encode('UTF-8'))
    gcs_file.close()
    return the_feed
    def _write_file(self, bucketFile, mtype=None, content="", raw=False, private=False, extrameta=None):
        log.info("Attempting to write: %s %s %s" % (bucketFile, mtype, raw))
        # The retry_params specified in the open call will override the default
        # retry params for this particular file handle.

        setAppVar(CLOUDSTAT,None)
        moremeta = getAppVar(CLOUDEXTRAMETA)
        setAppVar(CLOUDEXTRAMETA,None) #clear out now potentially stale values
        
        if extrameta and moremeta:
            extrameta.update(moremeta)
        else:
            extrameta = moremeta

        try:
            write_retry_params = cloudstorage.RetryParams(backoff_factor=1.1)
            write_options = {}
            if not private:
                write_options.update({'x-goog-acl': 'public-read'})
            if extrameta:
                write_options.update(extrameta)
            if private:
                write_options=""
            if not raw:
                log.info("Encoding to utf8")
                content = content.encode('utf-8')
            with cloudstorage.open(
                    bucketFile, 'w',
                    content_type=mtype,
                    options=write_options,
                    retry_params=write_retry_params) as cloudstorage_file:
                        cloudstorage_file.write(content)
        except Exception as e:
            log.error("File write error: (%s): %s" % (bucketFile,e))
            log.error(traceback.format_exc())
            return False
        
        setAppVar(CLOUDSTAT,self._stat_file(bucketFile,cache=False))
        return True
Exemple #35
0
def get_calfeed_for_key(prefix, the_key):
    bucket_name = os.environ.get('BUCKET_NAME',
                           app_identity.get_default_gcs_bucket_name())

    filename = "/{0}/{1}{2}{3}".format(bucket_name,prefix,"cal-",the_key.urlsafe())

    gcs_file = None

    try:
        gcs_file = gcs.open(filename)
    except:
        gcs_file = None

    if gcs_file:
        feed = gcs_file.read()
        gcs_file.close()
        logging.info("found file")
    else:
        feed = None
        logging.info("did not find file")

    return feed
Exemple #36
0
def save_to_gcs(file_obj):
    serving_url = ''  #just assign it adn reassign later

    time_stamp = int(time.time())
    app_id = app_identity.get_application_id()

    fname = '/%s.appspot.com/post_%s.jpg' % (app_id, time_stamp)
    logging.error(fname)

    # Content Types
    # audio/mpeg
    # image/jpeg

    gcs_file = gcs.open(fname, 'w', content_type="image/jpeg")
    gcs_file.write(file_obj)
    gcs_file.close()

    gcs_filename = "/gs%s" % fname
    serving_url = images.get_serving_url(blobstore.create_gs_key(gcs_filename))
    media_obj = save_gcs_to_media(gcs_filename, serving_url)

    return media_obj
Exemple #37
0
  def post(self):
    datastore_hooks.SetPrivilegedRequest()

    try:
      params = json.loads(self.request.body)
      gcs_file_path = params['gcs_file_path']

      try:
        gcs_file = cloudstorage.open(
            gcs_file_path, 'r', retry_params=_RETRY_PARAMS)
        with DecompressFileWrapper(gcs_file) as decompressing_file:
          histogram_dicts = _LoadHistogramList(decompressing_file)

        gcs_file.close()

        ProcessHistogramSet(histogram_dicts)
      finally:
        cloudstorage.delete(gcs_file_path, retry_params=_RETRY_PARAMS)

    except Exception as e: # pylint: disable=broad-except
      logging.error('Error processing histograms: %r', e.message)
      self.response.out.write(json.dumps({'error': e.message}))
    def post(self):
        self.response.headers.add_header('Access-Control-Allow-Origin', '*')
        self.response.headers['Content-Type'] = 'application/json'

        bucket_name = app_identity.get_default_gcs_bucket_name()
        uploaded_file = self.request.POST.get('uploaded_file')
        file_name = getattr(uploaded_file, 'filename', None)
        file_content = getattr(uploaded_file, 'file', None)
        real_path = ''

        if file_name and file_content:
            content_t = mimetypes.guess_type(file_name)[0]
            real_path = os.path.join('/', bucket_name, file_name)

            with cloudstorage.open(real_path,
                                   'w',
                                   content_type=content_t,
                                   options={'x-goog-acl': 'public-read'}) as f:
                f.write(file_content.read())

            key = self._get_urls_for(file_name)
            self.response.write(key)
Exemple #39
0
 def get(self):
     days = self.request.get('days', '1')
     participantID = self.request.get('participantID', '000')
     today = datetime.today()-timedelta(hours=timezone)
     userIndex = getUserID(participantID.encode('utf-8'))
     #self.response.write('userIndex: ' + str(userIndex))
     data = {}
     if (userIndex == 'error'):
         self.response.set_status(404)
         self.response.out.write('wrong participantID')
         return
     else:
         bucket_name = os.environ.get(
             'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
         bucket = '/' + bucket_name + '/Users' + '/' + userIndex + '/Dailyrecords'
         for x in range(int(days)):
             tempday = str((today - timedelta(x)).strftime('%Y%m%d'))
             tempdayfile = tempday + '/'
             # self.response.out.write(tempday)
             if self.FileExists(bucket, tempdayfile):
                 filename = bucket + '/' + tempdayfile + 'weight.json'
                 with cloudstorage.open(filename) as gcs_file_read:
                     contents = gcs_file_read.read()
                     contents_json = json.loads(contents)
                     # self.response.write(type(contents_json["body"]["measuregrps"]).__name__)
                     # self.response.write('\n\n##\n\n')
                     # self.response.write(contents_json["body"]["measuregrps"])
                     for value in contents_json["body"]["measuregrps"]:
                         #self.response.write(type(value).__name__) #dict
                         #self.response.write(type(value["measures"]).__name__) #list
                         #self.response.write(len(value["measures"]))#1
                         try:
                             data[str(value["grpid"])]=value["measures"][0]["value"]
                         except KeyError:
                             self.response.write('error')
                         #data[value["grpid"]] = value["measures"]["value"]
                         # for key, value in contents_json["body"]["measuregrps"].iteritems():
                         #     data[value["grpid"]]=value["measures"]["value"]
         self.response.out.write(data)
Exemple #40
0
    def get(self):
        # Get the default Cloud Storage Bucket name and create a file name for
        # the object in Cloud Storage.
        bucket = app_identity.get_default_gcs_bucket_name()

        # Cloud Storage file names are in the format /bucket/object.
        filename = '/{}/blobstore_serving_demo'.format(bucket)

        # Create a file in Google Cloud Storage and write something to it.
        with cloudstorage.open(filename, 'w') as filehandle:
            filehandle.write('abcde\n')

        # In order to read the contents of the file using the Blobstore API,
        # you must create a blob_key from the Cloud Storage file name.
        # Blobstore expects the filename to be in the format of:
        # /gs/bucket/object
        blobstore_filename = '/gs{}'.format(filename)
        blob_key = blobstore.create_gs_key(blobstore_filename)

        # BlobstoreDownloadHandler serves the file from Google Cloud Storage to
        # your computer using blob_key.
        self.send_blob(blob_key)
def start_prune_job(
        job_id):  # , email, tree_base, tree_set, sample_size, names
    with gcs.open(JOB_CONFIG_PATH % job_id) as fh:
        job_info = yaml.load(fh)

    # init_job(job_id, email, tree_base, tree_set, sample_size, names)

    # Get the base tree from config
    tree_base = job_info['base_tree'].lower()

    # Get the tree_set from config
    tree_set = job_info['tree_set']
    for k, v in TREE_CODES.iteritems():
        if v == tree_set:
            tree_set = k

    names = [n.replace(' ', '_') for n in job_info['names']]
    sample_trees = job_info['sample_trees']

    start_pruning(job_id, tree_base, sample_trees, names)
    # process_bad_names(job_id)
    finalise_job(job_id, sample_trees, tree_base, tree_set)
    def create_test_content(self, bucket_name, object_prefix, num_files):
        """Create a file in Google Cloud Storage with a small amount of content.

    Args:
      bucket_name: the name of the bucket, with no delimiters.
      object_prefix: a string prefix for each object/file that will be created.
        A suffix with a file number will automatically be appended.
      num_files: the number of files to create.

    Returns:
      A list with each element containing the data in one of the created files.
    """
        created_content = []
        for file_num in range(num_files):
            content = "Dummy Content %d" % file_num
            created_content.append(content)
            test_file = cloudstorage.open(
                "/%s/%s%03d" % (bucket_name, object_prefix, file_num),
                mode="w")
            test_file.write(content)
            test_file.close()
        return created_content
  def create_file(self, filename):
    """Create a file.

    The retry_params specified in the open call will override the default
    retry params for this particular file handle.

    Args:
      filename: filename.
    """
    self.response.write('Creating file %s\n' % filename)

    write_retry_params = gcs.RetryParams(backoff_factor=1.1)
    gcs_file = gcs.open(filename,
                        'w',
                        content_type='text/plain',
                        options={'x-goog-meta-foo': 'foo',
                                 'x-goog-meta-bar': 'bar'},
                        retry_params=write_retry_params)
    gcs_file.write('abcde\n')
    gcs_file.write('f'*1024*4 + '\n')
    gcs_file.close()
    self.tmp_filenames_to_clean_up.append(filename)
Exemple #44
0
  def post(self):
    trace_uuid = str(uuid.uuid4())
    bucket_name = ('/performance-insights/' + trace_uuid)
    gcs_file = gcs.open(bucket_name,
                        'w',
                        content_type='application/octet-stream',
                        options={},
                        retry_params=default_retry_params)
    gcs_file.write(self.request.get('trace'))
    gcs_file.close()

    trace_object = trace_info.TraceInfo(id=trace_uuid)
    trace_object.prod = self.request.get('prod')
    trace_object.remote_addr = os.environ["REMOTE_ADDR"]
    tags_string = self.request.get('tags')
    if re.match('^[a-zA-Z0-9,]+$', tags_string): # ignore non alpha-numeric tags
      trace_object.tags = tags_string.split(',')
    trace_object.user_agent = self.request.headers.get('User-Agent')
    trace_object.ver = self.request.get('product_version')
    trace_object.put()

    self.response.write(trace_uuid)
Exemple #45
0
    def get(self):
        days = self.request.get('days', '1')
        participantID = self.request.get('participantID', '000')
        today = datetime.today()-timedelta(hours=timezone)
        userIndex = getUserID(participantID.encode('utf-8'))
        totalstepcount=0
        #self.response.out.write('totalstepcount\n')
        if (userIndex == 'error'):
            self.response.set_status(404)
            self.response.out.write('wrong participantID')
            return
        else:
            #self.response.write(userIndex)
            bucket_name = os.environ.get(
                'BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
            bucket = '/' + bucket_name + '/Users' + '/' + userIndex + '/Dailyrecords'
            for x in range (int(days)):
                tempday = str((today-timedelta(x)).strftime('%Y%m%d'))+'/'
                #self.response.out.write(tempday)
                if self.FileExists(bucket,tempday):
                    filename = bucket + '/' + tempday + 'activity.json'
                    with cloudstorage.open(filename) as gcs_file_read:
                        contents = gcs_file_read.read()
                        contents_json = json.loads(contents)

                        #self.response.out.write ('type:' + str(type(contents_json["body"]["series"]).__name__))
                        if len(contents_json["body"]["series"])!= 0:
                            #self.response.write(len(contents_json["body"]["series"]))
                            for key, value in contents_json["body"]["series"].iteritems():
                                try:
                                    totalstepcount+=int(value["steps"])
                                except KeyError:
                                    self.response.out.write('')
                        gcs_file_read.close()

                # else:  #this is when "days" is larger than the actual number of days in the database
                #     self.response.out.write('')
            self.response.out.write(totalstepcount)
def migrate_single_blob_inline(blob_info, bucket_name):
    """Migrates a single, small blob.

  Args:
    blob_info: The BlobInfo for the blob to copy.
    bucket_name: The name of the bucket to copy the blob info.

  Returns:
    The resulting filename for the GCS file, rooted by "/[bucket_name]/..."
  """
    options = {}
    if blob_info.filename:
        options['content-disposition'] = (build_content_disposition(
            blob_info.filename.encode('utf8')))

    gcs_filename = build_gcs_filename(blob_info,
                                      filename=blob_info.filename,
                                      bucket_name=bucket_name,
                                      include_bucket=True,
                                      include_leading_slash=True)

    blob_reader = blobstore.BlobReader(blob_info, buffer_size=BLOB_BUFFER_SIZE)

    gcs_file = cloudstorage.open(gcs_filename.encode('utf8'),
                                 mode='w',
                                 content_type=blob_info.content_type,
                                 options=options)

    try:
        chunk = blob_reader.read(BLOB_BUFFER_SIZE)
        while chunk:
            gcs_file.write(chunk)
            chunk = blob_reader.read(BLOB_BUFFER_SIZE)
    finally:
        gcs_file.close()

    store_mapping_entity(blob_info, gcs_filename)
    return gcs_filename
Exemple #47
0
    def post(self):

        if self.session.get('userid') == None:
            self.redirect('/')

        message = ""
        fileitem = self.request.POST["filedata"]
        if fileitem.filename:
            fn = os.path.basename(fileitem.filename)

            filedata = fileitem.file.read()

            cloudfile = str(uuid.uuid4())
            write_retry_params = gcs.RetryParams(backoff_factor=1.1)
            gcs_file = gcs.open('/' + bucket_name + '/' + cloudfile,
                                'w',
                                content_type='text/plain',
                                options={
                                    'x-goog-meta-foo': 'foo',
                                    'x-goog-meta-bar': 'bar'
                                },
                                retry_params=write_retry_params)

            gcs_file.write(filedata)
            gcs_file.close()

            filelist = FileList(userid=str(self.session['userid']),
                                name=fn,
                                size=len(filedata),
                                loadtime=datetime.datetime.now(),
                                c_position=cloudfile,
                                deleted=False)
            filelist.put()
            self.redirect('/dashboard')
        else:
            message = 'Dosya Yükleme Hatası'

        self.response.out.write(message)
Exemple #48
0
def read_file(bucket, filename, chunk_size=CHUNK_SIZE):
    """Reads a file and yields its content in chunks of a given size.

  Arguments:
    bucket: a bucket that contains the file.
    filename: name of the file to read.
    chunk_size: maximum size of a chunk to read and yield.

  Yields:
    Chunks of a file (as str objects).
  """
    path = '/%s/%s' % (bucket, filename)
    bytes_read = 0
    data = None
    file_ref = None
    try:
        with cloudstorage.open(path,
                               read_buffer_size=chunk_size,
                               retry_params=_make_retry_params()) as file_ref:
            while True:
                data = file_ref.read(chunk_size)
                if not data:
                    break
                bytes_read += len(data)
                yield data
                # Remove reference to a buffer so it can be GC'ed.
                data = None
    except Exception as exc:
        logging.warning('Exception while reading \'%s\', read %d bytes: %s %s',
                        path, bytes_read, exc.__class__.__name__, exc)
        raise
    finally:
        # Remove lingering references to |data| and |file_ref| so they get GC'ed
        # sooner. Otherwise this function's frame object keeps references to them,
        # A frame object is around as long as there are references to this
        # generator instance somewhere.
        data = None
        file_ref = None
Exemple #49
0
    def create(cls, mapreduce_state, shard_state):
        """Create new writer for a shard.

    Args:
      mapreduce_state: an instance of model.MapreduceState describing current
        job. State can NOT be modified.
      shard_state: an instance of model.ShardState.

    Returns:
      an output writer for the requested shard.
    """
        # Get the current job state
        job_spec = mapreduce_state.mapreduce_spec
        writer_spec = _get_params(job_spec.mapper, allow_old=False)

        # Determine parameters
        key = cls._generate_filename(writer_spec, job_spec.name,
                                     job_spec.mapreduce_id,
                                     shard_state.shard_number,
                                     shard_state.retries)
        # GoogleCloudStorage format for filenames, Initial slash is required
        filename = "/%s/%s" % (writer_spec[cls.BUCKET_NAME_PARAM], key)

        content_type = writer_spec.get(cls.CONTENT_TYPE_PARAM, None)

        options = {}
        if cls.ACL_PARAM in writer_spec:
            options["x-goog-acl"] = writer_spec.get(cls.ACL_PARAM)

        account_id = writer_spec.get(cls._ACCOUNT_ID_PARAM, None)

        writer = cloudstorage.open(filename,
                                   mode="w",
                                   content_type=content_type,
                                   options=options,
                                   _account_id=account_id)

        return cls(writer, filename, writer_spec=writer_spec)
    def _get_file(self, bucketFile, reqtype=None, cache=True):
        log.info("_get_file(%s,%s)" % (bucketFile,cache))

        setAppVar(CLOUDSTAT,None)
        
        stat = None
        content = None
        cached = False
        if cache:
            item = self.readCache(bucketFile,reqtype)
            if item:
                content = item.content
                stat = item.stat
                if content:
                    cached = True
                    log.info("Got from readCache")
                    
        if not stat:
            stat = self._stat_file(bucketFile,cache=False)
            if stat:
                log.info('Opening file {}'.format(bucketFile))
                try:
                    with cloudstorage.open(bucketFile) as cloudstorage_file:
                        content = cloudstorage_file.read()
                        cloudstorage_file.close()
                        
                except cloudstorage.NotFoundError:
                    log.info("File not found: %s" % bucketFile)
                except Exception as e:
                    log.info("File read error (%s): %s" % (bucketFile,e))
                    
        if not cached and content:
            log.info("Adding to cache: %s" % bucketFile)
            val = bucketCacheItem(stat=stat,content=content)
            self.writeCache(bucketFile,val, reqtype) 

        setAppVar(CLOUDSTAT,stat)
        return stat, content
Exemple #51
0
  def post(self):
    datastore_hooks.SetPrivilegedRequest()

    try:
      params = json.loads(self.request.body)
      gcs_file_path = params['gcs_file_path']

      try:
        gcs_file = cloudstorage.open(
            gcs_file_path, 'r', retry_params=_RETRY_PARAMS)
        contents = gcs_file.read()
        data_str = zlib.decompress(contents)
        gcs_file.close()
      finally:
        cloudstorage.delete(gcs_file_path, retry_params=_RETRY_PARAMS)

      with timing.WallTimeLogger('json.loads'):
        histogram_dicts = json.loads(data_str)

      ProcessHistogramSet(histogram_dicts)
    except Exception as e: # pylint: disable=broad-except
      logging.error('Error processing histograms: %r', e.message)
      self.response.out.write(json.dumps({'error': e.message}))
Exemple #52
0
    def get(self):
        app_id = self.request.get("app_id", None)
        azzert(app_id is not None, "app_id is not found")
        logging.debug("GetOSALaucherAppHandler app_id: %s", app_id)

        app = OSALauncherApp.get_by_app_id(app_id)
        if app:
            filename = "%s-%s.apk" % (app.app_id, app.version_code)
            try:
                gae_filename = '%s/oca/launcher/apps/%s.apk' % (
                    ROGERTHAT_ATTACHMENTS_BUCKET, app_id)
                self.response.headers[
                    'Content-Type'] = "application/vnd.android.package-archive"
                self.response.headers['Content-Disposition'] = str(
                    'attachment; filename=%s' % filename)
                with cloudstorage.open(gae_filename, 'r') as gcs_file:
                    self.response.write(gcs_file.read())

            except cloudstorage.errors.NotFoundError:
                logging.warn("GetOSALaucherAppHandler NOT found in gcs")
                self.error(500)
        else:
            self.error(500)
Exemple #53
0
    def post(self):
        file = open('header.html')
        self.response.write(file.read())
        bucket_name = app_identity.get_default_gcs_bucket_name()
        self.response.write("<h4>"+bucket_name+"</h4>")
        
        name = self.request.POST.get('tfName')
        branch = self.request.POST.get('ddBranch')
        self.response.write(name)
        self.response.write(branch)
        
        data = self.request.POST.getall('file1')
        self.response.write(data)
        self.response.write("<hr/>")

        for file in data:
            f1 = gcs.open("/"+bucket_name+"/"+file.filename, 
                            "w", 
                            content_type=file.type)
            f1.write(file.file.read())
            f1.close()
            self.response.write("<h4>https://storage.googleapis.com/"+bucket_name+"/"+file.filename+"</h4>")
        self.response.write("Files uploaded successfully")
Exemple #54
0
 def serve(self, d):
     import cloudstorage as gcs
     rkey = self.request.get('rkey')
     r = Report.GetAccessible(rkey, self.user, urlencoded_key=True)
     if r:
         if r.is_done() and r.gcs_files:
             gcsfn = r.gcs_files[0]
             if tools.on_dev_server():
                 try:
                     gcs_file = gcs.open(gcsfn, 'r')
                 except gcs.NotFoundError, e:
                     self.response.out.write("File not found")
                 else:
                     self.response.headers['Content-Type'] = Report.content_type(r.extension)
                     self.response.headers['Content-Disposition'] = str('attachment; filename="%s"' % r.filename())
                     self.response.write(gcs_file.read())
                     gcs_file.close()
             else:
                 signed_url = tools.sign_gcs_url(gcsfn, expires_after_seconds=5)
                 response = self.redirect(signed_url)
                 logging.info(response)
         else:
             self.set_response(success=False, status=404, message="Report not ready") # Not found
Exemple #55
0
def get_last_end_time(project_id, bucket_name):
    """ Get the end_time as a string value from a JSON object in GCS. 
        This file is used to remember the last end_time in case one isn't provided
    """
    last_end_time_str = ""
    file_name = '{}.{}'.format(project_id, config.LAST_END_TIME_FILENAME)
    logging.debug("get_last_end_time - file_name: {}".format(file_name))
    try:
        gcs_file = gcs.open('/{}/{}'.format(
            bucket_name, file_name))
        contents = gcs_file.read()
        logging.debug("GCS FILE CONTENTS: {}".format(contents))
        json_contents = json.loads(contents) 
        last_end_time_str = json_contents["end_time"]
        gcs_file.close()
    except NotFoundError as nfe:
        logging.error("Missing file when reading {} from GCS: {}".format(file_name, nfe))
        last_end_time_str = None
    except Exception as e:
        logging.error("Received error when reading {} from GCS: {}".format(file_name,e))
        last_end_time_str = None

    return last_end_time_str
def playsong(request, song_id, song_seed):
    '''Grabs current song to get json.
     Gets seed from last page seed value and magic'''

    song = get_object_or_404(UMusic, user=request.user, id=song_id)
    song_json = json.loads(json.loads(song.song_json))

    inFiles = getwavs(song_json, song_seed)  # get paths from algorythm

    if settings.INPRODUCTION:
        songData = ""
        for inFile in inFiles:
            file = gcs.open(inFile)
            songData += file.read()
            file.close()
    else:
        songData = ""
        for inFile in inFiles:
            file = open(inFile, "rb")
            songData += file.read()
            file.close()

    return HttpResponse(songData, content_type='audio/mpeg')
Exemple #57
0
    def post(self, user):

        self.response.headers['Content-Type'] = 'text/plain'
        self.response.out.write('All OK!!')

        bucket = '/modelr_live_bucket/'
        filename = bucket + str(user.user_id) + '/' + str(time.time())

        encoded_image = self.request.get('image').split(',')[1]
        pic = base64.b64decode(encoded_image)

        gcsfile = gcs.open(filename, 'w')
        gcsfile.write(pic)

        gcsfile.close()

        bs_file = '/gs' + filename

        blob_key = blobstore.create_gs_key(bs_file)

        ImageModel(parent=user,
                   user=user.user_id,
                   image=blob_key).put()
Exemple #58
0
    def _open_file(cls, writer_spec, filename_suffix, use_tmp_bucket=False):
        """Opens a new gcs file for writing."""
        if use_tmp_bucket:
            bucket = cls._get_tmp_gcs_bucket(writer_spec)
            account_id = cls._get_tmp_account_id(writer_spec)
        else:
            bucket = cls._get_gcs_bucket(writer_spec)
            account_id = cls._get_account_id(writer_spec)

        # GoogleCloudStorage format for filenames, Initial slash is required
        filename = "/%s/%s" % (bucket, filename_suffix)

        content_type = writer_spec.get(cls.CONTENT_TYPE_PARAM, None)

        options = {}
        if cls.ACL_PARAM in writer_spec:
            options["x-goog-acl"] = writer_spec.get(cls.ACL_PARAM)

        return cloudstorage.open(filename,
                                 mode="w",
                                 content_type=content_type,
                                 options=options,
                                 _account_id=account_id)
Exemple #59
0
    def AuthenticatedGet(self, org, event):
        filename = all_event_timeless_filename(event)
        bucket_path = BUCKET_NAME + '/' + filename

        try:
            file_stat = cloudstorage.stat(bucket_path)
        except cloudstorage.NotFoundError:
            self.abort(404)

        # rewrite filename to include timestamp
        custom_timestamp = timestamp(
            datetime.datetime.utcfromtimestamp(file_stat.st_ctime))
        filename_to_serve = file_stat.filename.replace(
            '.csv', '-%s.csv' % custom_timestamp)

        # serve the file as an attachment, forcing download
        gcs_fd = cloudstorage.open(bucket_path)
        if file_stat.content_type:
            self.response.headers['Content-Type'] = file_stat.content_type
        self.response.headers['Content-Disposition'] = (str(
            'attachment; filename="%s"' % filename_to_serve))

        self.response.write(gcs_fd.read())