Exemple #1
0
 def test_proto_to_proto(self):        
   input_key = CreateTestProto('key_foo')
   input_value = CreateTestProto('value_foo')    
   test_bucket_name = 's3dict_test_foobar_%s' % time.time()
   key_type = deluge_pb2.ResourceProvenance()
   value_type = deluge_pb2.ResourceProvenance()
   d = s3dict.S3ProtoDictionary(test_bucket_name, key_type, value_type)
   self.assertEqual(d.Get(input_key), None)
   d.Set(input_key, input_value)
   output_value = d.Get(input_key)
   self.assertEqual(output_value, input_value)    
   #print 'key: %s' % input_key
   #print 'value: %s' % output_value    
   return
Exemple #2
0
def GetProvenanceDict():
    global provenance_dict
    if not provenance_dict:
        provenance_dict = s3dict.S3ProtoDictionary(
            'deluge_resource_provenance_db_v02',
            value_proto=deluge_pb2.ResourceProvenance())
    return provenance_dict
Exemple #3
0
def CreateTestProto(param):
  test_proto = deluge_pb2.ResourceProvenance()
  test_proto.fingerprint = param
  test_proto.flow = param
  test_proto.name = param
  test_proto.uri = param
  test_proto.start_time_sec = 1000
  test_proto.end_time_sec = 1010
  test_proto.input_fingerprints.extend(['fp1','fp2', 'fp3'])  
  return test_proto
Exemple #4
0
    def Execute(self):
        """ Runs the flow and records provenance metadata for all outputs."""
        # to be called only by scheduler
        # get fingerprints of all inputs
        input_fingerprints = []
        for input_name, input_resource in self.inputs.iteritems():
            fingerprint = input_resource.GetFingerprint()
            input_fingerprints.append(fingerprint)
            if input_resource.is_generated:
                record = provenance.GetResourceProvenanceRecord(fingerprint)
                if record == None:
                    LOG(
                        INFO, 'failed to find record for fingerprint: %s' %
                        (fingerprint))
                    LOG(INFO, 'uri: %s' % (input_resource.GetUri()))
                    LOG(
                        INFO,
                        'You are missing provenance record for a generated resource: %s.  You probably terminated a flow... but the MR was not stopped. The resource appears to be there, but has no "finish time" record.  You probably want to delete it so it gets recreated along with proper metadata.'
                        % (input_resource))
                    delete_file = raw_input()
                    if delete_file == 'yes':
                        py_pert.Remove(input_resource.GetUri())
                        LOG(FATAL, 'please restart the flow...')

        start_time_sec = time.time()
        self.Run()
        end_time_sec = time.time()
        for output_name, output_resource in self.outputs.iteritems():
            if output_resource.flow != self:
                LOG(
                    INFO,
                    "I don't own this output... not creating provenance record for it: %s"
                    % output_resource
                )  # Only the flow that created the output should generate provenance record for it... otherwise later flows may overwrite the provenance info and cause a loss of info about the intermediate steps.
                continue

            output_fingerprint = output_resource.GetFingerprint()
            record = deluge_pb2.ResourceProvenance()
            record.fingerprint = output_fingerprint
            record.flow = self.__class__.__name__
            record.name = output_name
            record.uri = output_resource.GetUri()
            record.start_time_sec = start_time_sec
            record.end_time_sec = end_time_sec
            record.input_fingerprints.extend(input_fingerprints)
            CHECK(record.IsInitialized())
            provenance.SetResourceProvenanceRecord(record)
        return