def process_METS( self, filename, created_by, expid=None, ): logger.debug('START EXP: ' + str(expid)) url = 'http://www.example.com' self.url = 'http://www.example.com' f = open(filename, 'r') xmlString = f.read() f.close() ep = ExperimentParser(str(xmlString)) del xmlString e = Experiment( id=expid, url=url, approved=True, title=ep.getTitle(), institution_name=ep.getAgentName('DISSEMINATOR'), description=ep.getAbstract(), created_by=created_by, ) e.save() url_path = self.url.rpartition('/')[0] + self.url.rpartition('/')[1] author_experiments = Author_Experiment.objects.all() author_experiments = \ author_experiments.filter(experiment=e).delete() x = 0 for authorName in ep.getAuthors(): author_experiment = Author_Experiment(experiment=e, author=SaveUnicode(authorName), order=x) author_experiment.save() x = x + 1 # looks like the intention here is to reload all the datasets from # scratch e.dataset_set.all().delete() # for each dataset... for dmdid in ep.getDatasetDMDIDs(): d = Dataset(experiment=e, description=ep.getDatasetTitle(dmdid)) d.save() # for each metadata element of this dataset... for admid in ep.getDatasetADMIDs(dmdid): techxml = ep.getTechXML(admid) prefix = techxml.getroot().prefix xmlns = techxml.getroot().nsmap[prefix] try: schema = Schema.objects.get(namespace__exact=xmlns) parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = parameternames.order_by('id') for pn in parameternames: if pn.is_numeric: value = ep.getParameterFromTechXML(techxml, pn.name) if value != None: dp = DatasetParameter(dataset=d, name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = DatasetParameter(dataset=d, name=pn, string_value=ep.getParameterFromTechXML( techxml, pn.name), numerical_value=None) dp.save() except Schema.DoesNotExist: logger.debug('Schema ' + xmlns + " doesn't exist!") # todo replace with logging # for each file in the dataset... for fileid in ep.getFileIDs(dmdid): # if ep.getFileLocation(fileid).startswith('file://'): # absolute_filename = url_path + \ # ep.getFileLocation(fileid).partition('//')[2] # else: # absolute_filename = ep.getFileLocation(fileid).... if self.null_check(ep.getFileName(fileid)): filename = ep.getFileName(fileid) else: filename = ep.getFileLocation(fileid).rpartition('/')[2] # logger.debug(filename) url = ep.getFileLocation(fileid) protocol = url.partition('://')[0] datafile = Dataset_File(dataset=d, filename=filename, url=url, size=ep.getFileSize(fileid), protocol=protocol) datafile.save() # for each metadata element of this file... for admid in ep.getFileADMIDs(fileid): techxml = ep.getTechXML(admid) prefix = techxml.getroot().prefix xmlns = techxml.getroot().nsmap[prefix] try: schema = \ Schema.objects.get(namespace__exact=xmlns) parameternames = \ ParameterName.objects.filter( schema__namespace__exact=schema.namespace) parameternames = parameternames.order_by('id') for pn in parameternames: if pn.is_numeric: value = \ ep.getParameterFromTechXML(techxml, pn.name) if value != None: dp = \ DatafileParameter( dataset_file=datafile, name=pn, string_value=None, numerical_value=float(value)) dp.save() else: dp = \ DatafileParameter(dataset_file=datafile, name=pn, string_value=ep.getParameterFromTechXML( techxml, pn.name), numerical_value=None) dp.save() except Schema.DoesNotExist: xml_data = XML_data(datafile=datafile, xmlns=SafeUnicode(xmlns), data=SafeUnicode(techxml.getvalue())) xml_data.save() logger.debug('DONE EXP: ' + str(e.id)) return e.id