def fail(self, action): "delete action and all actions dependent on it. Returns the number of actions deleted" log.debug('#### %s: FAIL %s' % (self.target, action)) user_data = {'failed_action': action, 'deleted': []} self.dag.visit(action, self._cb_remove, user_data) #self.show() return user_data['deleted']
def _iterate(self): """ Run the actions listed in schedule on the items returned by _new_batch """ #log.debug('_iterate: oustanding=%s' % self.outstanding) #d if self.gameover: log.debug('_iterate: gameover') return action, task = self._get_action() if action: item, schedule = task['item'], task['schedule'] method, params = self.scripts[action] try: item_params = loads(item.params) # tmp bug fixing starts here for k in params.keys(): if params[k] == '' and (k in item_params[action]): params[k] = item_params[action][k] # tmp bug fixing ends here params.update(item_params.get('*', {})) x = re.compile('^[a-z_]+' ) # cut out digits from action name params.update(item_params.get(x.match(action).group(), {})) self.outstanding += 1 #params = {u'source_variant_name': u'original'} d = method(self.process.workspace, item.target_id, **params) except Exception, e: log.error('ERROR in %s: %s %s' % (str(method), type(e), str(e))) self._handle_err(str(e), item, schedule, action, params) else: d.addCallbacks(self._handle_ok, self._handle_err, callbackArgs=[item, schedule, action, params], errbackArgs=[item, schedule, action, params])
def fail(self, action): "delete action and all actions dependent on it. Returns the number of actions deleted" log.debug('#### %s: FAIL %s' % (self.target, action)) user_data = {'failed_action':action, 'deleted':[]} self.dag.visit(action, self._cb_remove, user_data) #self.show() return user_data['deleted']
def get_cmdline(self): extractor_type = self.source.get_extractor() log.debug('######333\n########## ExtractBasic: using %s' % extractor_type) self.remote_exe = self.exe_list[extractor_type] self.cmdline = getattr(self, 'cmd_%s' % extractor_type) self.parser = getattr(self, 'parse_%s' % extractor_type) self.cmdline = self.cmdline % {'infile': self.source.uri}
def _iterate(self): """ Run the actions listed in schedule on the items returned by _new_batch """ #log.debug('_iterate: oustanding=%s' % self.outstanding) #d if self.gameover: log.debug('_iterate: gameover') return action, task = self._get_action() if action: item, schedule = task['item'], task['schedule'] method, params = self.scripts[action] try: item_params = loads(item.params) # tmp bug fixing starts here for k in params.keys(): if params[k] == '' and (k in item_params[action]): params[k] = item_params[action][k] # tmp bug fixing ends here params.update(item_params.get('*', {})) x = re.compile('^[a-z_]+') # cut out digits from action name params.update(item_params.get(x.match(action).group(), {})) self.outstanding += 1 #params = {u'source_variant_name': u'original'} d = method(self.process.workspace, item.target_id, **params) except Exception, e: log.error('ERROR in %s: %s %s' % (str(method), type(e), str(e))) self._handle_err(str(e), item, schedule, action, params) else: d.addCallbacks(self._handle_ok, self._handle_err, callbackArgs=[item, schedule, action, params], errbackArgs=[item, schedule, action, params])
def parse_media_basic(self, result, filename): log.debug('parse_media_basic: entering "%s"' % type(result)) fullpath = str(self._fc.abspath(filename)) parser = Parser() parseString(result.encode('utf-8'), parser) features = parser.parsed self._save_features(features, 'media_basic') return 'ok'
def run(self): "Start the iteration initializing state so that the iteration starts correctly" log.debug('### Running process %s' % str(self.process.pk)) self.deferred = defer.Deferred() self.process.start_date = datetime.datetime.now() self.process.save() self.process.targets = ProcessTarget.objects.filter(process=self.process).count() self.tasks = [] reactor.callLater(0, self._iterate) return self.deferred
def handle_result(self, result, component): log.debug("handle_result %s" % str(result)) log.debug("[save_component] component %s" % component.pk) if result: directory, name = os.path.split(result) component.uri = name component.save() else: log.error("Empty result passed to save_and_extract_features") self.deferred.callback(result)
def run(self): "Start the iteration initializing state so that the iteration starts correctly" log.debug('### Running process %s' % str(self.process.pk)) self.deferred = defer.Deferred() self.process.start_date = datetime.datetime.now() self.process.save() self.process.targets = ProcessTarget.objects.filter( process=self.process).count() self.tasks = [] reactor.callLater(0, self._iterate) return self.deferred
def handle_result(self, result, component): log.debug('handle_result %s' % str(result)) log.debug("[save_component] component %s" % component.pk) if result: directory, name = os.path.split(result) component.uri = name component.save() else: log.error('Empty result passed to save_and_extract_features') self.deferred.callback(result)
def show(self): ret = "sched %s: " % self.target for action in self.action_list: if action not in self.actions: ret += "%s:k, " % action # done, failed or cancelled elif self.actions[action]['index'] in self.ready: ret += "%s:=, " % action # ready elif not self.actions[action]['wlist']: ret += "%s:@, " % action # running else: ret += ("%s:<%s>, " % (action, '-'.join(self.actions[action]['wlist']))) # waiting log.debug(ret)
def show(self): ret = "sched %s: " % self.target for action in self.action_list: if action not in self.actions: ret += "%s:k, " % action # done, failed or cancelled elif self.actions[action]['index'] in self.ready: ret += "%s:=, " % action # ready elif not self.actions[action]['wlist']: ret += "%s:@, " % action # running else: ret += ("%s:<%s>, " % (action, '-'.join(self.actions[action]['wlist'])) ) # waiting log.debug(ret)
def parse_doc_basic(self, result, filename): log.debug('parse_doc_basic: entering') features = {} lines = result.split('\n') for line in lines: sep = line.find(':') if sep < 0: continue key = line[:sep].strip() value = line[sep+1:].strip() features[key] = value features['size'] = long(features.get('File size', '-1').split()[0]) features['pages'] = features['Pages'] self._save_features(features, 'doc_basic') return 'ok'
def parse_doc_basic(self, result, filename): log.debug('parse_doc_basic: entering') features = {} lines = result.split('\n') for line in lines: sep = line.find(':') if sep < 0: continue key = line[:sep].strip() value = line[sep + 1:].strip() features[key] = value features['size'] = long(features.get('File size', '-1').split()[0]) features['pages'] = features['Pages'] self._save_features(features, 'doc_basic') return 'ok'
def _get_action(self): """returns the first action found or None. Delete tasks with no actions left""" #log.debug("_get_action on num_tasks=%s" % len(self.tasks)) #d to_delete = [] action = '' for n in xrange(len(self.tasks)): idx = (self.cur_task + n) % len(self.tasks) task = self.tasks[idx] action = task['schedule'].action_to_run() if action is None: to_delete.append(task) elif action: break #log.debug('to_delete %s' % to_delete) #d for t in to_delete: #log.debug('deleting done target %s' % t['item'].target_id) #d self.tasks.remove(t) # update cur_task so that we do not always start querying the same task for new actions if action: idx = self.tasks.index(task) self.cur_task = (idx + 1) % len(self.tasks) else: self.cur_task = 0 # if action is None or empy there is no action ready to run # if there are new targets available try to read some and find some new action if action: return action, task else: if not self.all_targets_read and self.outstanding < self.max_outstanding: new_tasks = self._new_batch() if new_tasks: self.cur_task = len(self.tasks) self.tasks.extend(new_tasks) if self.all_targets_read and not self.tasks: log.debug("_get_action: gameover") self.stop() return None, None
def handle_result(self, result): log.debug('handle_result %s' % str(result)) log.debug("[save_component] component %s" % self.out_comp.pk) log.debug('##############\n%s\n' % result['data']) directory, name = os.path.split(self.out_file) self.out_comp.uri = name self.out_comp.save() self.item.update_time = time.time() self.item.save() self.deferred.callback(self.out_file)
class XMPEmbedderImpl: def __init__(self): self._fc = Storage() def metadata_synch(self, component_id, changes): # get xmp # the filename is needed because the extension is unknown, the following line of code is # tmp code because c.ID will include extension file (not only basename) # in the new MediaDART release #print 'MediaDART resource path: ', md_res_path try: myxmpfilename = str(self._fc.abspath(component_id)) except Exception, err: print '\n found some problems getting filename, err: ', err, '\n' xmpfile = XMPFiles(file_path=myxmpfilename, open_forupdate=files.XMP_OPEN_FORUPDATE) xmp = xmpfile.get_xmp() if not xmp: xmp = XMPMeta() for ns in changes.keys(): #print 'Property ', str(i[0]),':', str(i[1]) # first of all check if namespace str(i[0]) and property name str(i[1]) exist prefix = None try: prefix = xmp.get_prefix_for_namespace(str(ns)) except XMPError, err: print 'Error in get_prefix_for_namespace: ', err if prefix == None: #print 'prefix ', prefix[:-1] , ' does not exist.' try: log.debug('%s %s' % (str(ns), str(changes[ns]['prefix']))) res = xmp.register_namespace( str(ns), str(changes[ns]['prefix'])) # CHANGE ME #print 'register_namespace gave res = ', res except XMPError, err: print 'Error in register_namespace: ', err
def _save_metadata(self, features, ctype): c = self.source #log.debug('######## _save_metadata %s %s' % (c, features)) xmp_metadata_commons = {'size':[('notreDAM','FileSize')]} xmp_metadata_audio = {'channels':[('xmpDM', 'audioChannelType')], 'sample_rate':[('xmpDM', 'audioSampleRate')], 'duration':[('notreDAM', 'Duration')]} xmp_metadata_video = {'height':[('xmpDM', 'videoFrameSize','stDim','h')] , 'width':[('xmpDM', 'videoFrameSize','stDim','w')], 'r_frame_rate':[('xmpDM','videoFrameRate')], 'bit_rate':[('xmpDM','fileDataRate')], 'duration':[('notreDAM', 'Duration')]} xmp_metadata_image = {'height':[('tiff', 'ImageLength')] , 'width':[('tiff', 'ImageWidth')]} xmp_metadata_doc = {'pages': [('notreDAM', 'NPages')], 'Copyright': [('dc', 'rights')]} xmp_metadata_image.update(xmp_metadata_commons) xmp_metadata_audio.update(xmp_metadata_commons) xmp_metadata_doc.update(xmp_metadata_commons) xmp_metadata_video.update(xmp_metadata_audio) xmp_metadata = {'image': xmp_metadata_image, 'video': xmp_metadata_video, 'audio': xmp_metadata_audio, 'doc': xmp_metadata_doc} metadata_list = [] delete_list = [] media_type = c.media_type.name user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) for feature in features.keys(): if features[feature]=='' or features[feature] == '0': continue if feature == 'file_size': c.size = features[feature] if feature == 'height': c.height = features[feature] elif feature == 'width': c.width = features[feature] try: xmp_names = xmp_metadata[media_type][feature] except KeyError: continue for m in xmp_names: try: ms = MetadataProperty.objects.get(namespace__prefix=m[0], field_name= m[1]) except: log.debug( 'inside readfeatures, unknown metadata %s:%s ' % (m[0],m[1])) continue if ms.is_variant or c.variant.name == 'original': if len(m) == 4: property_xpath = "%s:%s[1]/%s:%s" % (m[0], m[1], m[2], m[3]) else: property_xpath = '' try: if ms.type == 'lang': x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], language=metadata_default_language, xpath=property_xpath) else: x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], xpath=property_xpath) metadata_list.append(x) delete_list.append(ms) except: log.debug('inside readfeatures, could not get %s' % ms) continue c.save() return (metadata_list, delete_list)
def end_test(result, process): global gameover gameover = True print_stats(process, False) log.debug('end of test %s' % result) reactor.callLater(3, reactor.stop)
xmp.set_property_float( str(ns), str(i), float(changes[ns]['fields'][i]['value'][0])) except XMPError, err: print 'Error in set_property_float: ', err elif changes[ns]['fields'][i]['type'] == 'long': try: xmp.set_property_long( str(ns), str(i), long(changes[ns]['fields'][i]['value'][0])) except XMPError, err: print 'Error in set_property_long: ', err else: try: log.debug( '%s %s %s' % (str(ns), str(i), str(changes[ns]['fields'][i]['value'][0]))) xmp.set_property( str(ns), str(i), str(changes[ns]['fields'][i]['value'][0])) except XMPError, err: print 'Error in set_property: ', err else: #print '**************** Property IS ARRAY ', str(i[0]),':', str(i[1]) , ' *****************************' if changes[ns]['fields'][i][ 'xpath'] != []: # so it is a structure print 'array of structures is not supported by xmplib' continue if property_exists == False: # if it is an array and the property does not exist, it must be created, otherwise, it will not be set.
def _save_metadata(self, features, ctype): c = self.source #log.debug('######## _save_metadata %s %s' % (c, features)) xmp_metadata_commons = {'size': [('notreDAM', 'FileSize')]} xmp_metadata_audio = { 'channels': [('xmpDM', 'audioChannelType')], 'sample_rate': [('xmpDM', 'audioSampleRate')], 'duration': [('notreDAM', 'Duration')] } xmp_metadata_video = { 'height': [('xmpDM', 'videoFrameSize', 'stDim', 'h')], 'width': [('xmpDM', 'videoFrameSize', 'stDim', 'w')], 'r_frame_rate': [('xmpDM', 'videoFrameRate')], 'bit_rate': [('xmpDM', 'fileDataRate')], 'duration': [('notreDAM', 'Duration')] } xmp_metadata_image = { 'height': [('tiff', 'ImageLength')], 'width': [('tiff', 'ImageWidth')] } xmp_metadata_doc = { 'pages': [('notreDAM', 'NPages')], 'Copyright': [('dc', 'rights')] } xmp_metadata_image.update(xmp_metadata_commons) xmp_metadata_audio.update(xmp_metadata_commons) xmp_metadata_doc.update(xmp_metadata_commons) xmp_metadata_video.update(xmp_metadata_audio) xmp_metadata = { 'image': xmp_metadata_image, 'video': xmp_metadata_video, 'audio': xmp_metadata_audio, 'doc': xmp_metadata_doc } metadata_list = [] delete_list = [] media_type = c.media_type.name user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) for feature in features.keys(): if features[feature] == '' or features[feature] == '0': continue if feature == 'file_size': c.size = features[feature] if feature == 'height': c.height = features[feature] elif feature == 'width': c.width = features[feature] try: xmp_names = xmp_metadata[media_type][feature] except KeyError: continue for m in xmp_names: try: ms = MetadataProperty.objects.get(namespace__prefix=m[0], field_name=m[1]) except: log.debug('inside readfeatures, unknown metadata %s:%s ' % (m[0], m[1])) continue if ms.is_variant or c.variant.name == 'original': if len(m) == 4: property_xpath = "%s:%s[1]/%s:%s" % (m[0], m[1], m[2], m[3]) else: property_xpath = '' try: if ms.type == 'lang': x = MetadataValue( schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], language=metadata_default_language, xpath=property_xpath) else: x = MetadataValue(schema=ms, object_id=c.pk, content_type=ctype, value=features[feature], xpath=property_xpath) metadata_list.append(x) delete_list.append(ms) except: log.debug('inside readfeatures, could not get %s' % ms) continue c.save() return (metadata_list, delete_list)
def _read_xmp_features(self, features): xpath = re.compile( r'(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}') ctype = ContentType.objects.get_for_model(self.item) ctype_component = ContentType.objects.get_for_model(self.component) user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) metadata_dict = {} metadata_list = [] delete_list = [] log.debug('READ XMP FEATURES') if not isinstance(features, dict): item.state = 1 item.save() return [], [] for feature in features.keys(): try: namespace_obj = XMPNamespace.objects.get(uri=feature) except Exception, e: log.error('####### Error: unknown namespace %s: %s' % (feature, str(e))) continue metadata_dict[namespace_obj] = {} namespace_properties = MetadataProperty.objects.filter( namespace=namespace_obj) for property_values in features[feature]: property_xpath = property_values[0] property_value = property_values[1] property_options = property_values[2] xpath_splitted = xpath.findall(property_xpath) metadata_property = xpath_splitted[0][1].strip() metadata_index = xpath_splitted[0][2].strip() found_property = namespace_properties.filter( field_name__iexact=metadata_property) if found_property.count() > 0 and len( property_value.strip()) > 0: if found_property[0].is_array == 'not_array': delete_list.append(found_property[0]) if property_options['IS_QUALIFIER'] and xpath_splitted[-1][ 1] == 'lang': #log.debug('############# setting throw away IS_QUALIFIER option') find_xpath = property_xpath.replace('/?xml:lang', '') if metadata_dict[namespace_obj].has_key(find_xpath): if property_value == 'x-default': property_value = metadata_default_language metadata_dict[namespace_obj][ find_xpath].language = property_value else: log.debug('metadata property not found: ' + find_xpath) pass #log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value)) else: if found_property[0].is_variant: x = MetadataValue(schema=found_property[0], object_id=self.component.pk, content_type=ctype_component, value=property_value, xpath=property_xpath) else: x = MetadataValue(schema=found_property[0], object_id=self.item.pk, content_type=ctype, value=property_value, xpath=property_xpath) metadata_dict[namespace_obj][property_xpath] = x metadata_list.append(x)
elif x.xpath == 'exif:GPSLongitude': longitude = x.value x.save() except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) self.deferred.errback(e) return if latitude != None and longitude != None: try: GeoInfo.objects.save_geo_coords(self.component.item, latitude, longitude) except Exception, ex: log.debug( 'ex while saving latitude and longitude in dam db: %s' % ex) self.deferred.callback('ok') def extract_xmp(self): d = self.proxy.extract(self.component.uri) d.addCallbacks(self._cb_xmp_ok, self._cb_error) return d def test(): print 'test' item = Item.objects.get(pk=1) workspace = DAMWorkspace.objects.get(pk=1) d = run(
def done(self, action): log.debug('#### target %s: done %s' % (self.target, action)) del self.actions[action] self.dag.visit(action, self._cb_set_to_ready, action)
def _read_xmp_features(self, features): xpath = re.compile(r"(?P<prefix>\w+):(?P<property>\w+)(?P<array_index>\[\d+\]){,1}") ctype = ContentType.objects.get_for_model(self.item) ctype_component = ContentType.objects.get_for_model(self.component) user = self.item.uploaded_by() metadata_default_language = get_metadata_default_language(user) metadata_dict = {} metadata_list = [] delete_list = [] log.debug("READ XMP FEATURES") if not isinstance(features, dict): item.state = 1 item.save() return [], [] for feature in features.keys(): try: namespace_obj = XMPNamespace.objects.get(uri=feature) except Exception, e: log.error("####### Error: unknown namespace %s: %s" % (feature, str(e))) continue metadata_dict[namespace_obj] = {} namespace_properties = MetadataProperty.objects.filter(namespace=namespace_obj) for property_values in features[feature]: property_xpath = property_values[0] property_value = property_values[1] property_options = property_values[2] xpath_splitted = xpath.findall(property_xpath) metadata_property = xpath_splitted[0][1].strip() metadata_index = xpath_splitted[0][2].strip() found_property = namespace_properties.filter(field_name__iexact=metadata_property) if found_property.count() > 0 and len(property_value.strip()) > 0: if found_property[0].is_array == "not_array": delete_list.append(found_property[0]) if property_options["IS_QUALIFIER"] and xpath_splitted[-1][1] == "lang": # log.debug('############# setting throw away IS_QUALIFIER option') find_xpath = property_xpath.replace("/?xml:lang", "") if metadata_dict[namespace_obj].has_key(find_xpath): if property_value == "x-default": property_value = metadata_default_language metadata_dict[namespace_obj][find_xpath].language = property_value else: log.debug("metadata property not found: " + find_xpath) pass # log.debug('###@@@@ %s: (%s)' % (find_xpath, property_value)) else: if found_property[0].is_variant: x = MetadataValue( schema=found_property[0], object_id=self.component.pk, content_type=ctype_component, value=property_value, xpath=property_xpath, ) else: x = MetadataValue( schema=found_property[0], object_id=self.item.pk, content_type=ctype, value=property_value, xpath=property_xpath, ) metadata_dict[namespace_obj][property_xpath] = x metadata_list.append(x)
for x in xmp_metadata_list: if x.xpath == "exif:GPSLatitude": latitude = x.value elif x.xpath == "exif:GPSLongitude": longitude = x.value x.save() except Exception, e: log.error("Error in %s: %s %s" % (self.__class__.__name__, type(e), str(e))) self.deferred.errback(e) return if latitude != None and longitude != None: try: GeoInfo.objects.save_geo_coords(self.component.item, latitude, longitude) except Exception, ex: log.debug("ex while saving latitude and longitude in dam db: %s" % ex) self.deferred.callback("ok") def extract_xmp(self): d = self.proxy.extract(self.component.uri) d.addCallbacks(self._cb_xmp_ok, self._cb_error) return d def test(): print "test" item = Item.objects.get(pk=1) workspace = DAMWorkspace.objects.get(pk=1) d = run(4, workspace.pk, source_variant_name=["original"]) print "addBoth"
def handle_result(self, result, *args): #log.debug('= handle_result %s' % str(result)[:128]) try: return_value = self.parse_stdout(result['data'], *args) self.deferred.callback(return_value) except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) self.deferred.errback(e) def handle_error(self, result): self.deferred.errback(Failure(Exception(result.getErrorMessage()))) def execute(self, **params): # get basic data (avoid creating stuff in DB) try: self.get_cmdline(**params) args = splitstring(self.cmdline) except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) self.deferred.errback(e) else: if self.fake: log.debug('######### Command line:\n%s' % str(args)) else: proxy = Proxy(self.md_server) d = proxy.call(self.remote_exe, args, self.env) d.addCallbacks(self.handle_result, self.handle_error, callbackArgs=self.cb_args) return self.deferred # if executed stand alone
xmp.set_property_int(str(ns), str(i), int(changes[ns]["fields"][i]["value"][0])) except XMPError, err: print "Error in set_property_int: ", err elif changes[ns]["fields"][i]["type"] == "float": try: xmp.set_property_float(str(ns), str(i), float(changes[ns]["fields"][i]["value"][0])) except XMPError, err: print "Error in set_property_float: ", err elif changes[ns]["fields"][i]["type"] == "long": try: xmp.set_property_long(str(ns), str(i), long(changes[ns]["fields"][i]["value"][0])) except XMPError, err: print "Error in set_property_long: ", err else: try: log.debug("%s %s %s" % (str(ns), str(i), str(changes[ns]["fields"][i]["value"][0]))) xmp.set_property(str(ns), str(i), str(changes[ns]["fields"][i]["value"][0])) except XMPError, err: print "Error in set_property: ", err else: # print '**************** Property IS ARRAY ', str(i[0]),':', str(i[1]) , ' *****************************' if changes[ns]["fields"][i]["xpath"] != []: # so it is a structure print "array of structures is not supported by xmplib" continue if property_exists == False: # if it is an array and the property does not exist, it must be created, otherwise, it will not be set. try: if ( changes[ns]["fields"][i]["is_array"] == "alt" and changes[ns]["fields"][i]["type"] == "lang"
self.item.update_time = time.time() self.item.save() self.deferred.callback(self.out_file) def handle_error(self, result): self.deferred.errback(Failure(Exception(result.getErrorMessage()))) def execute(self, output_variant_name, output_type, **params): # get basic data (avoid creating stuff in DB) try: self.get_cmdline(output_variant_name, output_type, **params) output_variant = Variant.objects.get(name=output_variant_name) self.out_comp = self.item.create_variant(output_variant, self.workspace, self.out_type) self.out_comp.source = self.source args = splitstring(self.cmdline) except Exception, e: log.error('Error in %s: %s %s' % (self.__class__.__name__, type(e), str(e))) self.deferred.errback(e) else: if self.fake: log.debug('######### Command line:\n%s' % str(args)) else: proxy = Proxy(self.md_server) d = proxy.call(self.remote_exe, args, self.env) d.addCallbacks(self.handle_result, self.handle_error) return self.deferred # if executed stand alone