def index(self, pkg): '''index a file''' parser = Parser() data = parser.parse(pkg["fileobj"], pkg["format"]) # prepare the data as required data, pkg = self.prepare(data,pkg) # if allowed to index, then index (match source or email) if self.can_index(pkg): # delete any old versions # should change this to do checks first, and save new ones, perhaps try: if "collection" in pkg: bibserver.dao.Record.delete_by_query("collection.exact:" + pkg["collection"]) if "source" in pkg: res = bibserver.dao.Record.query(q='source:"' + pkg["source"] + '" AND type:"collection"') if res["hits"]["total"] != 0: coll = res["hits"]["hits"][0]["_source"]["collection"] else: coll = "" if coll != pkg.get("collection",None): bibserver.dao.Record.delete_by_query("collection.exact:" + coll) except: pass # send the data list for bulk upsert return bibserver.dao.Record.bulk_upsert(data) else: return "DUPLICATE"
def upload(self, fileobj, format_, collection=None): '''Import a collection into the database. :param fileobj: a fileobj pointing to file from which to import collection records (and possibly collection metadata) :param format_: format of the fileobj (e.g. bibtex) :param collection: collection dict for use when creating collection. If undefined collection must be extractable from the fileobj. :return: same as `index` method. ''' parser = Parser() record_dicts, metadata = parser.parse(fileobj, format=format_) #collection_from_parser = None #if collection_from_parser: # collection = collection_from_parser # TODO: check authz for write to this collection # if metadata provided from file, roll it into the collection object if metadata: metadata.update(collection) collection = metadata return self.index(collection, record_dicts)
def parse(): # TODO: acceptable formats should be derived by some sort of introspection # from the parser.py based on what parsers are actually available. if 'format' not in request.values or 'source' not in request.values: if 'format' not in request.values and 'source' not in request.values: resp = make_response( '{"error": "Parser cannot run without source URL parameter and source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' ) elif 'format' not in request.values: resp = make_response( '{"error": "Parser cannot run without source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' ) elif 'source' not in request.values: resp = make_response( '{"error": "Parser cannot run without source URL parameter"}') resp.mimetype = "application/json" return resp format = request.values.get("format").strip('"') source = request.values.get("source").strip('"') try: if not source.startswith('http://') and not source.startswith('https://'): source = 'http://' + source source = urllib2.unquote(source) fileobj = urllib2.urlopen(source) except: resp = make_response( '{"error": "Retrieval of file from source ' + source + ' failed"}' ) resp.mimetype = "application/json" return resp parser = Parser() newcoll = {} newcoll['records'], newcoll['metadata'] = parser.parse(fileobj, format=format) newcoll['metadata']['source'] = source timestamp = datetime.now().isoformat() newcoll['metadata']['created'] = timestamp if request.values.get('collection',None): collection = request.values['collection'].strip('"') newcoll['metadata']['label'] = collection newcoll['metadata']['id'] = util.slugify(collection) for record in newcoll['records']: record['collection'] = newcoll['metadata']['id'] resp = make_response( json.dumps(newcoll, sort_keys=True, indent=4) ) resp.mimetype = "application/json" return resp