def build(self): if self.start_date_bound is not None and self.end_date_bound is not None: self.dates.append( Date.Builder() .setEarliestDate(self.start_date_bound) .setLatestDate(self.end_date_bound) .setType(DateType.CREATION) .build() ) if len(self.agents) > 0: self._object_builder.setAgents(AgentSet.Builder().setElements(ImmutableList.copyOf(self.agents)).build()) if len(self.dates) > 0: self._object_builder.setDates(DateSet.Builder().setElements(ImmutableList.copyOf(self.dates)).build()) if len(self.descriptions) > 0: self._object_builder.setDescriptions(DescriptionSet.Builder().setElements(ImmutableList.copyOf(self.descriptions)).build()) if len(self.images) > 0: self._object_builder.setImages(ImmutableList.copyOf(self.images)) if len(self.locations) > 0: self._object_builder.setLocations(LocationSet.Builder().setElements(ImmutableList.copyOf(self.locations)).build()) if len(self.rights) > 0: self._object_builder.setRights(RightsSet.Builder().setElements(ImmutableList.copyOf(self.rights)).build()) if len(self.subjects) > 0: self._object_builder.setSubjects(SubjectSet.Builder().setElements(ImmutableList.copyOf(self.subjects)).build()) if len(self.textrefs) > 0: self._object_builder.setTextrefs(TextrefSet.Builder().setElements(ImmutableList.copyOf(self.textrefs)).build()) if len(self.titles) > 0: self._object_builder.setTitles(TitleSet.Builder().setElements(ImmutableList.copyOf(self.titles)).build()) if len(self.work_types) > 0: self._object_builder.setWorkTypes(WorkTypeSet.Builder().setElements(ImmutableList.copyOf(self.work_types)).build()) return self._object_builder.build()
def addOrMergeStratumAndAncestors(cls, curStratum, ordering, toAdd, strataDependencyGraph, downstreamStrata): """ generated source for method addOrMergeStratumAndAncestors """ if downstreamStrata.contains(curStratum): mergeStrata(Sets.newHashSet(toMerge), toAdd, strataDependencyGraph) return downstreamStrata.add(curStratum) for parent in ImmutableList.copyOf(strataDependencyGraph.get(curStratum)): # We could merge away the parent here, so we protect against CMEs and # make sure the parent is still in toAdd before recursing. if toAdd.contains(parent): cls.addOrMergeStratumAndAncestors(parent, ordering, toAdd, strataDependencyGraph, downstreamStrata) downstreamStrata.remove(curStratum) # - If we've added all our parents, we will still be in toAdd # and none of our dependencies will be in toAdd. Add to the ordering. # - If there was a merge upstream that we weren't involved in, # we will still be in toAdd, but we will have (possibly new) # dependencies that are still in toAdd. Do nothing. # - If there was a merge upstream that we were involved in, # we won't be in toAdd anymore. Do nothing. if not toAdd.contains(curStratum): return for parent in strataDependencyGraph.get(curStratum): if toAdd.contains(parent): return ordering.add(curStratum) toAdd.remove(curStratum)
def __init__(self, form, trueSentences): """ generated source for method __init__ """ super(FunctionInfoImpl, self).__init__() self.form = form numSlots = form.getTupleSize() i = 0 while i < numSlots: # We want to establish whether or not this is a constant... for sentence in trueSentences: ConcurrencyUtils.checkForInterruption() tuplePart.addAll(tuple_.subList(0, i)) tuplePart.addAll(tuple_.subList(i + 1, len(tuple_))) if functionMap.containsKey(tuplePart): # We have two tuples with different values in just this slot functional = False break # Otherwise, we record it functionMap.put(ImmutableList.copyOf(tuplePart), tuple_.get(i)) if functional: # Record the function self.dependentSlots.add(True) self.valueMaps.add(functionMap) else: # Forget it self.dependentSlots.add(False) self.valueMaps.add(None) i += 1
def create(cls, form, domain): """ generated source for method create """ domainsForSlotsBuilder = Lists.newArrayList() i = 0 while i < form.getTupleSize(): domainsForSlotsBuilder.add(ImmutableSet.builder()) i += 1 for sentence in domain: assert form.matches(sentence) if len(tuple_) != form.getTupleSize(): raise IllegalArgumentException() while i < len(tuple_): domainsForSlotsBuilder.get(i).add(constant) i += 1 domainsForSlots = ImmutableList.builder() for builder in domainsForSlotsBuilder: domainsForSlots.add(builder.build()) return FullSentenceFormDomain(form, ImmutableList.copyOf(domain), domainsForSlots.build())
def applyAmbiguitiesToRules(cls, description, ambiguitiesByOriginalForm, model): """ generated source for method applyAmbiguitiesToRules """ result = ImmutableList.builder() for gdl in description: if isinstance(gdl, (GdlRule, )): result.addAll(applyAmbiguities(gdl, ambiguitiesByOriginalForm, model)) else: result.add(gdl) return result.build()
def create(cls, description): """ generated source for method create """ gameRules = ImmutableList.copyOf(description) sentenceForms = getSentenceForms(gameRules) rulesByForm = getRulesByForm(gameRules, sentenceForms) trueSentencesByForm = getTrueSentencesByForm(gameRules, sentenceForms) dependencyGraph = getDependencyGraph(sentenceForms, rulesByForm) constantSentenceForms = getConstantSentenceForms(sentenceForms, dependencyGraph) independentSentenceForms = getIndependentSentenceForms(sentenceForms, dependencyGraph) return ImmutableSentenceFormModel(gameRules, sentenceForms, constantSentenceForms, independentSentenceForms, dependencyGraph, rulesByForm, trueSentencesByForm)
def getObjectsByCollectionId(self, collection_id, logger, log_marker): objects = [] for root_dir_path, _, file_names in os.walk(os.path.join(self.__data_dir_path, "record")): for file_name in file_names: file_path = os.path.join(root_dir_path, file_name) if not file_path.endswith(".xml"): os.rename(file_path, file_path + ".xml") file_path = file_path + ".xml" objects.append(self.__map_oai_pmh_record(collection_id=collection_id, file_path=file_path)) return ImmutableList.copyOf(objects)
def checkValidity(self, theGame): """ generated source for method checkValidity """ try: sm.initialize(theGame.getRules()) if len(bases) == 0: raise ValidatorException("Could not find base propositions.") elif len(inputs) == 0: raise ValidatorException("Could not find input propositions.") for base in bases: truesFromBases.add(GdlPool.getRelation(self.TRUE, base.getBody())) for input in inputs: legalsFromInputs.add(GdlPool.getRelation(self.LEGAL, input.getBody())) if truesFromBases.isEmpty() and legalsFromInputs.isEmpty(): return ImmutableList.of() while System.currentTimeMillis() < startTime + self.millisecondsToTest: # Check state against bases, inputs if not truesFromBases.isEmpty(): if not truesFromBases.containsAll(state.getContents()): missingBases.addAll(state.getContents()) missingBases.removeAll(truesFromBases) raise ValidatorException("Found missing bases: " + missingBases) if not legalsFromInputs.isEmpty(): for role in sm.getRoles(): for move in legalMoves: legalSentences.add(GdlPool.getRelation(self.LEGAL, [None]*)) if not legalsFromInputs.containsAll(legalSentences): missingInputs.addAll(legalSentences) missingInputs.removeAll(legalsFromInputs) raise ValidatorException("Found missing inputs: " + missingInputs) state = sm.getRandomNextState(state) if sm.isTerminal(state): state = initialState except MoveDefinitionException as mde: raise ValidatorException("Could not find legal moves while simulating: " + mde) except TransitionDefinitionException as tde: raise ValidatorException("Could not find transition definition while simulating: " + tde) except RuntimeException as e: raise ValidatorException("Ran into a runtime exception while simulating: " + e) except StackOverflowError as e: raise ValidatorException("Ran into a stack overflow while simulating: " + e) except OutOfMemoryError as e: raise ValidatorException("Ran out of memory while simulating: " + e) return ImmutableList.of()
def _map_omeka_collections(self, institution_id, omeka_collections): collections = [] for omeka_collection in omeka_collections: collection = \ self._map_omeka_collection( institution_id=institution_id, omeka_collection=omeka_collection, ) if collection is not None: collections.append(collection) return ImmutableList.copyOf(collections)
def getObjectsByCollectionId(self, collection_id, logger, log_marker): objects = [] omeka_items = self.__api_client.get_all_items(collection=int(collection_id.getUnqualifiedCollectionId())) for omeka_item in omeka_items: objects.append( self._map_omeka_item( collection_id=collection_id, omeka_item=omeka_item, omeka_item_files=self.__api_client.get_all_files(item=omeka_item.id), ) ) return ImmutableList.copyOf(objects)
def _map_omeka_item_element_itm_accession_number(self, object_builder, text): object_builder.locations.append( Location.builder() .setRefids(ImmutableList.of( LocationRefid.builder() .setText(text) .setType(LocationRefidType.ACCESSION) .build(), )) .setType(LocationType.REPOSITORY) .build() )
def _map_omeka_item_element_dc_rights(self, object_builder, text): object_builder.setRights( RightsSet.builder() .setElements(ImmutableList.of( Rights.builder() # .setRightsHolder(self.__institution_title) .setText(text) .setType(RightsType.UNDETERMINED) .build() )) .build() )
def copyOf(cls, other): """ generated source for method copyOf """ if isinstance(other, (ImmutableSentenceDomainModel, )): return cls.copyOf((other).getFormModel()) elif isinstance(other, (ImmutableSentenceFormModel, )): return other rulesByForm = ImmutableSetMultimap.builder() trueSentencesByForm = ImmutableSetMultimap.builder() for form in other.getSentenceForms(): rulesByForm.putAll(form, other.getRules(form)) trueSentencesByForm.putAll(form, other.getSentencesListedAsTrue(form)) return ImmutableSentenceFormModel(ImmutableList.copyOf(other.getDescription()), ImmutableSet.copyOf(other.getSentenceForms()), ImmutableSet.copyOf(other.getConstantSentenceForms()), ImmutableSet.copyOf(other.getIndependentSentenceForms()), ImmutableSetMultimap.copyOf(other.getDependencyGraph()), rulesByForm.build(), trueSentencesByForm.build())
def build(self): if len(self.closure_placements) > 0 and len(self.closure_types) > 0: closures = [] if len(self.closure_placements) == len(self.closure_types): for closure_placement, closure_type in zip(self.closure_placements, self.closure_types): closures.append( Closure.builder() .setPlacement(closure_placement) .setType(closure_type) .build() ) elif len(self.closure_placements) == 1: for closure_type in self.closure_types: closures.append( Closure.builder() .setPlacement(self.closure_placements[0]) .setType(closure_type) .build() ) elif len(self.closure_types) == 1: for closure_placement in self.closure_placements: closures.append( Closure.builder() .setPlacement(closure_placement) .setType(self.closure_types[0]) .build() ) else: self.__logger.warn("item %d has different numbers of closure placements and closure types: %d vs. %d", self.omeka_item.id, len(self.closure_placements), len(self.closure_types)) if len(closures) > 0: self._object_builder.setClosures(ClosureSet.builder().setElements(ImmutableList.copyOf(closures)).build()) if len(self.component_builders_by_letter) > 0: components = [] for letter, component_builder in self.component_builders_by_letter.iteritems(): structures = self.structures_by_component_letter.pop(letter, []) structures.extend(self.structures_by_extent.pop(component_builder.term.text, [])) if len(structures) > 0: component_builder.setStructures(StructureSet.builder().setElements(ImmutableList.copyOf(structures)).build()) component = component_builder.build() components.append(component) for letter in self.structures_by_component_letter.iterkeys(): self.__logger.warn("structure(s) specified for unknown component %s on item %d", letter, self.omeka_item.id) for letter in self.structures_by_extent.iterkeys(): self.__logger.warn("structure(s) specified for unknown extent %s on item %d", letter, self.omeka_item.id) self._object_builder.setComponents(ComponentSet.builder().setElements(ImmutableList.copyOf(components)).build()) if len(self.colors) > 0: self._object_builder.setColors(ColorSet.builder().setElements(ImmutableList.copyOf(self.colors)).build()) if len(self.structures) > 0: self._object_builder.setStructures(StructureSet.builder().setElements(ImmutableList.copyOf(self.structures)).build()) return OmekaResourceMapper._ObjectBuilder.build(self)
def getCartesianDomainsFromModel(self): """ generated source for method getCartesianDomainsFromModel """ results = Maps.newHashMap() for sentenceEntry in sentencesModel.entrySet(): ConcurrencyUtils.checkForInterruption() # We'll end up taking the Cartesian product of the different # types of terms we have available if nameAndArity.getArity() == 0: results.put(form, CartesianSentenceFormDomain.create(form, ImmutableList.of())) else: for terms in Sets.cartesianProduct(sampleTerms): ConcurrencyUtils.checkForInterruption() results.put(form, domain) return results
def _map_omeka_item_element_itm_label(self, object_builder, text): text = text.strip("'").strip() if len(text) == 0: return object_builder.inscriptions.append( Inscription.builder() .setTexts(ImmutableList.of( InscriptionText.builder() .setText(text) .setType(InscriptionTextType.LABEL) .build() )) .build() )
def mergeStrata(cls, toMerge, toAdd, strataDependencyGraph): """ generated source for method mergeStrata """ newStratum = ImmutableSet.copyOf(Iterables.concat(toMerge)) for oldStratum in toMerge: toAdd.remove(oldStratum) toAdd.add(newStratum) # Change the keys for oldStratum in toMerge: strataDependencyGraph.putAll(newStratum, parents) strataDependencyGraph.removeAll(oldStratum) # Change the values for entry in ImmutableList.copyOf(strataDependencyGraph.entries()): if toMerge.contains(entry.getValue()): strataDependencyGraph.remove(entry.getKey(), entry.getValue()) strataDependencyGraph.put(entry.getKey(), newStratum)
def _map_omeka_item_element_dc_subject(self, object_builder, text): for subject in text.split(';'): subject = subject.strip() if len(subject) == 0: continue object_builder.subjects.append( Subject.builder() .setTerms(ImmutableList.of( SubjectTerm.builder() .setText(subject) .setType(SubjectTermType.OTHER_TOPIC) .build() )) .build() ) self._update_vocabulary_used('Dublin Core', 'Subject', subject)
def create(cls, conjunct, functionInfo, rightmostVar, varOrder, preassignment): """ generated source for method create """ # We have to set up the things mentioned above... internalFunctions = ArrayList() # We can traverse the conjunct for the list of variables/constants... terms = ArrayList() gatherVars(conjunct.getBody(), terms) # Note that we assume here that the var of interest only # appears once in the relation... varIndex = terms.indexOf(rightmostVar) if varIndex == -1: print "conjunct is: " + conjunct print "terms are: " + terms print "righmostVar is: " + rightmostVar terms.remove(rightmostVar) function_ = functionInfo.getValueMap(varIndex) # Set up inputs and such, using terms querySize = len(terms) isInputConstant = ArrayList(len(terms)) queryConstants = Maps.newHashMap() queryInputIndices = ArrayList(len(terms)) i = 0 while i < len(terms): if isinstance(term, (GdlConstant, )): isInputConstant.add(True) queryConstants.put(i, term) queryInputIndices.add(-1) elif isinstance(term, (GdlVariable, )): # Is it in the head assignment? if preassignment.containsKey(term): isInputConstant.add(True) queryConstants.put(i, preassignment.get(term)) queryInputIndices.add(-1) else: isInputConstant.add(False) # queryConstants.add(null); # What value do we put here? # We want to grab some value out of the # input tuple, which uses functional ordering # Index of the relevant variable, by the # assignment's ordering queryInputIndices.add(varOrder.indexOf(term)) i += 1 return AssignmentFunction(ImmutableList.copyOf(internalFunctions), querySize, ImmutableList.copyOf(isInputConstant), ImmutableMap.copyOf(queryConstants), ImmutableList.copyOf(queryInputIndices), ImmutableMap.copyOf(function_))
def addTuple(self, sentenceTuple): """ generated source for method addTuple """ if len(sentenceTuple) != self.form.getTupleSize(): raise IllegalArgumentException() # For each slot... i = 0 while i < len(sentenceTuple): if self.dependentSlots.get(i): # Either add to that entry, or invalidate the slot lookupTuple.remove(i) if curValue == None: # Just add to the map valueMap.put(ImmutableList.copyOf(lookupTuple), newValue) else: # If this isn't the existing sentence, invalidate this slot if curValue != newValue: self.dependentSlots.set(i, False) self.valueMaps.set(i, ImmutableMap.of()) i += 1
def _parse_record_metadata_subject_element(self, element, object_builder): text = element.text.strip() if len(text) == 0: return subject_term_builder = \ SubjectTerm.Builder()\ .setText(text)\ .setType(SubjectTermType.OTHER_TOPIC)\ qualifier = element.get('qualifier', None) if qualifier is not None: try: vocab = getattr(Vocab, qualifier) subject_term_builder.setVocabRef(VocabRef.Builder().setVocab(vocab).build()) except AttributeError: if qualifier not in ('named_person', 'UNTL-BS',): self._logger.warn("unknown subject vocabulary '%s'", qualifier) object_builder.subjects.append( Subject.Builder() .setTerms(ImmutableList.of(subject_term_builder.build())) .build() )
def put_institution(institution_id, institution_title, institution_url, store_parameters, collection_store_uri=None, data_rights=None): if data_rights is None: data_rights = \ RightsSet.Builder()\ .setElements(ImmutableList.of( Rights.Builder() .setRightsHolder(institution_title) .setText("Copyright %s %s" % (datetime.now().year, institution_title)) .setType(RightsType.COPYRIGHTED) .build() ))\ .build() PythonApi.getInstance().getInstitutionCommandService().putInstitution( institution_id, Institution.Builder() .setCollectionStoreUri(Optional.fromNullable(collection_store_uri)) .setDataRights(data_rights) .setStoreParameters(store_parameters) .setTitle(institution_title) .setUrl(institution_url) .build() )
def __init__(self, headAssignment, rule, varDomains, functionInfoMap, completedSentenceFormValues): """ generated source for method __init__ """ super(AssignmentsImpl, self).__init__() self.empty = False self.headAssignment = headAssignment # We first have to find the remaining variables in the body self.varsToAssign = GdlUtils.getVariables(rule) # Remove all the duplicates; we do, however, want to keep the ordering newVarsToAssign = ArrayList() for v in varsToAssign: if not newVarsToAssign.contains(v): newVarsToAssign.add(v) self.varsToAssign = newVarsToAssign self.varsToAssign.removeAll(headAssignment.keySet()) # varsToAssign is set at this point # We see if iterating over entire tuples will give us a # better result, and we look for the best way of doing that. # Let's get the domains of the variables # Map<GdlVariable, Set<GdlConstant>> varDomains = model.getVarDomains(rule); # Since we're looking at a particular rule, we can do this one step better # by looking at the domain of the head, which may be more restrictive # and taking the intersections of the two domains where applicable # Map<GdlVariable, Set<GdlConstant>> headVarDomains = model.getVarDomainsInSentence(rule.getHead()); # We can run the A* search for a good set of source conjuncts # at this point, then use the result to build the rest. completedSentenceFormSizes = HashMap() if completedSentenceFormValues != None: for form in completedSentenceFormValues.keySet(): completedSentenceFormSizes.put(form, completedSentenceFormValues.get(form).size()) varDomainSizes = HashMap() for var in varDomains.keySet(): varDomainSizes.put(var, varDomains.get(var).size()) bestOrdering = IterationOrderCandidate() bestOrdering = getBestIterationOrderCandidate(rule, varDomains, functionInfoMap, completedSentenceFormSizes, headAssignment, False)# model, # TODO: True here? # Want to replace next few things with order # Need a few extra things to handle the use of iteration over existing tuples self.varsToAssign = bestOrdering.getVariableOrdering() # For each of these vars, we have to find one or the other. # Let's start by finding all the domains, a task already done. self.valuesToIterate = Lists.newArrayListWithCapacity(len(self.varsToAssign)) for var in varsToAssign: if varDomains.containsKey(var): if not varDomains.get(var).isEmpty(): self.valuesToIterate.add(ImmutableList.copyOf(varDomains.get(var))) else: self.valuesToIterate.add(ImmutableList.of(GdlPool.getConstant("0"))) else: self.valuesToIterate.add(ImmutableList.of(GdlPool.getConstant("0"))) # Okay, the iteration-over-domain is done. # Now let's look at sourced iteration. self.sourceDefiningSlot = ArrayList(len(self.varsToAssign)) i = 0 while i < len(self.varsToAssign): self.sourceDefiningSlot.add(-1) i += 1 # We also need to convert values into tuples # We should do so while constraining to any constants in the conjunct # Let's convert the conjuncts sourceConjuncts = bestOrdering.getSourceConjuncts() self.tuplesBySource = Lists.newArrayListWithCapacity(len(sourceConjuncts)) # new ArrayList<List<List<GdlConstant>>>(len(sourceConjuncts)); self.varsChosenBySource = Lists.newArrayListWithCapacity(len(sourceConjuncts)) # new ArrayList<List<Integer>>(len(sourceConjuncts)); self.putDontCheckBySource = Lists.newArrayListWithCapacity(len(sourceConjuncts)) # new ArrayList<List<Boolean>>(len(sourceConjuncts)); j = 0 while j < len(sourceConjuncts): # flatten into a tuple # Go through the vars/constants in the tuple while i < len(conjunctTuple): if isinstance(term, (GdlConstant, )): constraintSlots.add(i) constraintValues.add(term) # TODO: What if tuple size ends up being 0? # Need to keep that in mind elif isinstance(term, (GdlVariable, )): varsChosen.add(varIndex) if self.sourceDefiningSlot.get(varIndex) == -1: # We define it self.sourceDefiningSlot.set(varIndex, j) putDontCheck.add(True) else: # It's an overlap; we just check for consistency putDontCheck.add(False) else: raise RuntimeException("Function returned in tuple") i += 1 self.varsChosenBySource.add(ImmutableList.copyOf(varsChosen)) self.putDontCheckBySource.add(ImmutableList.copyOf(putDontCheck)) # Now we put the tuples together # We use constraintSlots and constraintValues to check that the # tuples have compatible values for sentence in sentences: # Check that it doesn't conflict with our headAssignment if not headAssignment.isEmpty(): for var in headAssignment.keySet(): if tupleAssignment.containsKey(var) and tupleAssignment.get(var) != headAssignment.get(var): continue while c < len(constraintSlots): if not longTuple.get(slot) == value: continue c += 1 while s < len(longTuple): # constraintSlots is sorted in ascending order if c < len(constraintSlots) and constraintSlots.get(c) == s: c += 1 else: shortTuple.add(longTuple.get(s)) s += 1 # The tuple fits the source conjunct tuples.add(ImmutableList.copyOf(shortTuple)) # sortTuples(tuples); //Needed? Useful? Not sure. Probably not? self.tuplesBySource.add(ImmutableList.copyOf(tuples)) j += 1 # We now want to see which we can give assignment functions to self.valuesToCompute = ArrayList(len(self.varsToAssign)) for var in varsToAssign: self.valuesToCompute.add(None) self.indicesToChangeWhenNull = ArrayList(len(self.varsToAssign)) i = 0 while i < len(self.varsToAssign): # Change itself, why not? # Actually, instead let's try -1, to catch bugs better self.indicesToChangeWhenNull.add(-1) i += 1 # Now we have our functions already selected by the ordering # bestOrdering.functionalConjunctIndices; # Make AssignmentFunctions out of the ordering functionalConjuncts = bestOrdering.getFunctionalConjuncts() # print "functionalConjuncts: " + functionalConjuncts; i = 0 while i < len(functionalConjuncts): if functionalConjunct != None: # These are the only ones that could be constant functions if functionInfoMap != None: functionInfo = functionInfoMap.get(conjForm) if functionInfo != None: # Now we need to figure out which variables are involved # and which are suitable as functional outputs. # 1) Which vars are in this conjunct? # 2) Of these vars, which is "rightmost"? # 3) Is it only used once in the relation? if Collections.frequency(varsInSentence, rightmostVar) != 1: continue # Can't use it # 4) Which slot is it used in in the relation? # 5) Build an AssignmentFunction if appropriate. # This should be able to translate from values of # the other variables to the value of the wanted # variable. # We don't guarantee that this works until we check if not function_.functional(): continue self.valuesToCompute.set(index, function_) remainingVarsInSentence.remove(rightmostVar) self.indicesToChangeWhenNull.set(index, self.varsToAssign.indexOf(nextRightmostVar)) i += 1 # We now have the remainingVars also assigned their domains # We also cover the distincts here # Assume these are just variables and constants self.distincts = ArrayList() for literal in rule.getBody(): if isinstance(literal, (GdlDistinct, )): self.distincts.add(literal) computeVarsToChangePerDistinct() # Need to add "distinct" restrictions to head assignment, too... checkDistinctsAgainstHead()
def _parse_record_metadata_coverage_element(self, element, object_builder): text = element.text.strip() if len(text) == 0: return qualifier = element.attrib.get('qualifier', None) if qualifier is None: self._logger.warn("coverage element without qualifier on record %s", object_builder.record_identifier) return if qualifier == 'date': pass # Same as date element elif qualifier == 'eDate': if object_builder.end_date_bound is not None: self._logger.warn("record %s has multiple eDate's", object_builder.record_identifier) earliest_date, latest_date = self.__parse_date_range(text) if earliest_date != latest_date: self._logger.warn("record %s has a eDate range: %s", object_builder.record_identifier, text) object_builder.end_date_bound = latest_date elif qualifier == 'placeName': text_parts = [text_part.strip() for text_part in text.split(' - ')] self._logger.debug('place name from record %s: %s', object_builder.record_identifier, text) location_names_by_extent = {} location_names_by_extent['nation'] = nation = text_parts.pop(0) if len(text_parts) > 0: text_part = text_parts.pop(0) if nation == 'United States': extent = 'state' else: extent = 'region (geographic)' location_names_by_extent[extent] = text_part if len(text_parts) >= 2: text_part = text_parts.pop(0) if nation == 'United States': extent = 'county' else: extent = 'province' location_names_by_extent[extent] = text_part if len(text_parts) == 2: # 'Millburn Township', "Short Hills' # 'New York City', 'Brooklyn Borough' text_part = text_parts.pop(0) assert nation == 'United States', text_parts location_names_by_extent['township'] = text_part if len(text_parts) > 0: location_names_by_extent['inhabited place'] = text_parts.pop(0) assert len(text_parts) == 0, text for extent, text in location_names_by_extent.iteritems(): location_names_temp = self.__location_names_by_extent.setdefault(extent, []) if not text in location_names_temp: location_names_temp.append(text) object_builder.locations.append( Location.Builder() .setNames(ImmutableList.copyOf( LocationName.Builder() .setExtent(extent) .setText(text) .setType(LocationNameType.GEOGRAPHIC) .build() for extent, text in location_names_by_extent.iteritems() )) .setType(LocationType.OTHER) .build() ) elif qualifier == 'sDate': if object_builder.start_date_bound is not None: self._logger.warn("record %s has multiple sDate's", object_builder.record_identifier) earliest_date, latest_date = self.__parse_date_range(text) if earliest_date != latest_date: self._logger.warn("record %s has a sDate range: %s", object_builder.record_identifier, text) object_builder.start_date_bound = earliest_date else: self._logger.warn("unknown coverage qualifier '%s' on record %s", qualifier, object_builder.record_identifier)
def build(self): if len(self.agents) > 0: self._object_builder.setAgents(AgentSet.builder().setElements(ImmutableList.copyOf(self.agents)).build()) if len(self.categories) > 0: self._object_builder.setCategories(ImmutableList.copyOf(set(self.categories))) if len(self.cultural_contexts) > 0: self._object_builder.setCulturalContexts(CulturalContextSet.builder().setElements(ImmutableList.copyOf(self.cultural_contexts)).build()) if self.dc_date_builder.getEarliestDate() is not None and self.dc_date_builder.getLatestDate() is not None: if self.dc_date_certainty is not None: assert self.dc_date_certainty == 'circa' self.dc_date_builder.setEarliestDate( DateBound.builder(self.dc_date_builder.earliestDate)\ .setCirca(True)\ .build() ) self.dc_date_builder.setLatestDate( DateBound.builder(self.dc_date_builder.latestDate)\ .setCirca(True)\ .build() ) self.dates.append(self.dc_date_builder.build()) if len(self.dates) > 0: self._object_builder.setDates(DateSet.builder().setElements(ImmutableList.copyOf(self.dates)).build()) if len(self.descriptions) > 0: description_texts = {} description_i = 0 while description_i < len(self.descriptions): description = self.descriptions[description_i] if description.text in description_texts: del self.descriptions[description_i] else: description_texts[description.text] = None description_i = description_i + 1 self._object_builder.setDescriptions(DescriptionSet.builder().setElements(ImmutableList.copyOf(self.descriptions)).build()) for identifier in self.identifiers: self.textrefs.append( Textref.builder() .setName( TextrefName.builder() .setText("Identifier") .setType(TextrefNameType.CATALOG) .build() ) .setRefid( TextrefRefid.builder() .setText(identifier) .setType(TextrefRefidType.OTHER) .build() ) .build() ) if len(self.images) > 0: self._object_builder.setImages(ImmutableList.copyOf(self.images)) if len(self.inscriptions) > 0: self._object_builder.setInscriptions(InscriptionSet.builder().setElements(ImmutableList.copyOf(self.inscriptions)).build()) if len(self.locations) > 0: self._object_builder.setLocations(LocationSet.builder().setElements(ImmutableList.copyOf(self.locations)).build()) if len(self.materials) > 0: self._object_builder.setMaterials(MaterialSet.builder().setElements(ImmutableList.copyOf(self.materials)).build()) if len(self.measurements) > 0: self._object_builder.setMeasurements(MeasurementsSet.builder().setElements(ImmutableList.copyOf(self.measurements)).build()) if len(self.relation_builders) > 0: unique_relation_builders_by_text = {} for relation_builder in self.relation_builders: assert relation_builder.text is not None existing_relation_builders = unique_relation_builders_by_text.setdefault(relation_builder.text, []) unique = True for existing_relation_builder in existing_relation_builders: if existing_relation_builder.type == relation_builder.type: unique = False break if unique: existing_relation_builders.append(relation_builder) else: self.__logger.warn("item %d has duplicate relation type=%s, text=%s", self.__omeka_item.id, relation_builder.type, relation_builder.text) unique_relation_builders = [] for relation_builders in unique_relation_builders_by_text.values(): unique_relation_builders.extend(relation_builders) self._object_builder.setRelations( RelationSet.builder() .setElements(ImmutableList.copyOf(relation_builder.build() for relation_builder in unique_relation_builders)) .build()) if len(self.subjects) > 0: self._object_builder.setSubjects(SubjectSet.builder().setElements(ImmutableList.copyOf(self.subjects)).build()) if len(self.techniques) > 0: self._object_builder.setTechniques(TechniqueSet.builder().setElements(ImmutableList.copyOf(self.techniques)).build()) if len(self.textrefs) > 0: self._object_builder.setTextrefs(TextrefSet.builder().setElements(ImmutableList.copyOf(self.textrefs)).build()) if len(self.titles) > 0: self._object_builder.setTitles(TitleSet.builder().setElements(ImmutableList.copyOf(self.titles)).build()) else: raise ValueError('no titles') if len(self.work_types) > 0: self._object_builder.setWorkTypes(WorkTypeSet.builder().setElements(ImmutableList.copyOf(self.work_types)).build()) else: raise ValueError('no work types') return self._object_builder.build()
def getSentences(cls, rule, includeHead): """ generated source for method getSentences """ if includeHead == cls.VarDomainOpts.INCLUDE_HEAD: return Iterables.concat(ImmutableList.of(rule.getHead()), rule.getBody()) else: return rule.getBody()
.setStoreParameters(store_parameters) .setTitle(institution_title) .setUrl(institution_url) .build() ) data_dir_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'extracted')) assert os.path.isdir(data_dir_path) put_institution( data_rights=\ RightsSet.Builder() .setElements(ImmutableList.of( Rights.Builder() .setRightsHolder('University of North Texas') .setText("The contents of Texas Fashion Collection, hosted by the University of North Texas Libraries (digital content including images, text, and sound and video recordings) are made publicly available by the collection-holding partners for use in research, teaching, and private study. For the full terms of use, see http://digital.library.unt.edu/terms-of-use/") .setType(RightsType.COPYRIGHTED) .build() )) .build(), institution_id=InstitutionId.parse('untvca'), institution_title='Texas Fashion Collection', institution_url=Url.parse('http://digital.library.unt.edu/explore/collections/TXFC/'), store_parameters=ImmutableMap.of( 'record_mapper', TxfcOaiPmhRecordMapper.__module__ + '.' + TxfcOaiPmhRecordMapper.__name__ # @UndefinedVariable ), ) put_collection( collection_id=CollectionId.parse('untvca/txfc'), institution_id=InstitutionId.parse('untvca'), object_store_uri=Uri.parse(OaiPmhFsObjectStore.URI_SCHEME + ':/' + os.path.join(data_dir_path, 'untvca', 'txfc').replace(os.path.sep, '/')),
def map_omeka_item(self, collection_id, endpoint_url, omeka_item, omeka_item_files, square_thumbnail_height_px, square_thumbnail_width_px): object_id = ObjectId.parse(str(collection_id) + '/' + str(omeka_item.id)) vocab_ref = VocabRef.Builder().setVocab(Vocab.COSTUME_CORE).build() feature_name = None omeka_collection_id = int(collection_id.getUnqualifiedCollectionId()) for item in self.OMEKA_COLLECTIONS.iteritems(): if item[1] == omeka_collection_id: feature_name = item[0] break assert feature_name is not None feature_value = None item_image_credit_line = item_image_license = None for element_text in omeka_item.element_texts: if len(element_text.text) == 0: continue if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'Title': if feature_value is None: feature_value = element_text.text elif element_text.element_set.name == 'Item Type Metadata': if element_text.element.name == 'Image Creator': item_image_credit_line = element_text.text elif element_text.element.name == 'Image License': item_image_license = element_text.text else: self._logger.warn("Omeka item %d has unknown element set name '%s'", omeka_item.id, element_text.element_set.name) object_builder = \ Object.Builder()\ .setCollectionId(collection_id)\ .setHidden(True)\ .setInstitutionId(collection_id.getInstitutionId())\ .setStructures(\ StructureSet.Builder().setElements(ImmutableList.of( Structure.Builder() .setText(feature_value) .setType( StructureType.Builder() .setText(feature_name) .setVocabRef(vocab_ref) .build() ) .build() )) .build() )\ .setTitles( TitleSet.Builder().setElements(ImmutableList.of( Title.Builder() .setText("%(feature_value)s" % locals()) .setType(TitleType.DESCRIPTIVE) .build() )) .build() )\ .setViewType(ViewType.DETAIL) images = [] for file_ in omeka_item_files: if not file_.mime_type.startswith('image/'): continue image_credit_line = item_image_credit_line image_license = item_image_license for element_text in file_.element_texts: if element_text.element_set.name == 'Dublin Core': if element_text.element.name == 'License': image_license = element_text.text elif element_text.element.name == 'Provenance': image_credit_line = element_text.text if image_credit_line is None or len(image_credit_line) == 0: self._logger.warn("Omeka item %d has a file %d missing a Provenance", omeka_item.id, file_.id) continue if image_license is None or len(image_license) == 0: self._logger.warn("Omeka item %d has a file %d missing a License", omeka_item.id, file_.id) continue license_vocab_ref = None if image_license.lower() == 'public domain': rights_type = RightsType.PUBLIC_DOMAIN elif image_license == 'CC0': rights_type = RightsType.LICENSED license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse('https://creativecommons.org/publicdomain/zero/1.0/'))\ .build() elif image_license.startswith('CC BY-SA '): rights_type = RightsType.LICENSED version = image_license[len('CC BY-SA '):] float(version) license_vocab_ref = \ VocabRef.Builder()\ .setVocab(Vocab.CREATIVE_COMMONS)\ .setUri(Uri.parse("https://creativecommons.org/licenses/by-sa/%s/" % version))\ .build() else: rights_type = RightsType.LICENSED image_builder = Image.Builder() # @UndefinedVariable file_urls = file_.file_urls image_builder.setOriginal(ImageVersion.Builder().setUrl(Url.parse(file_urls.original)).build()) image_builder.setRights( RightsSet.Builder().setElements(ImmutableList.of( Rights.Builder() .setLicenseVocabRef(Optional.fromNullable(license_vocab_ref)) .setRightsHolder(image_credit_line) .setText(image_license) .setType(rights_type) .build() )) .build() ) if file_urls.square_thumbnail is None: self._logger.warn("Omeka item %d has a file %d missing a square thumbnail", omeka_item.id, file_.id) continue image_builder.setSquareThumbnail( ImageVersion.Builder() .setHeightPx(UnsignedInteger.valueOf(square_thumbnail_height_px)) .setUrl(Url.parse(file_urls.square_thumbnail)) .setWidthPx(UnsignedInteger.valueOf(square_thumbnail_width_px)) .build() ) images.append(image_builder.build()) if len(images) > 0: object_builder.setImages(ImmutableList.copyOf(images)) else: self._logger.warn("Omeka item %d has no valid images", omeka_item.id) object_ = object_builder.build() object_id = ObjectId.parse(str(collection_id) + '/' + urllib.quote(feature_value, '')) return ObjectEntry(object_id, object_)
def create(cls, form, domainsForSlots): """ generated source for method create """ return CartesianSentenceFormDomain(form, ImmutableList.copyOf(Lists.transform(domainsForSlots, Function())))
def cleanUpIrrelevantRules(cls, expandedRules): """ generated source for method cleanUpIrrelevantRules """ model = SentenceFormModelFactory.create(expandedRules) return ImmutableList.copyOf(Collections2.filter(expandedRules, Predicate()))