Example #1
0
def index(ticket):
    ticket['state'] = 'populating_index'
    ticket.save()
    # Make sure the parsed content is in the cache
    download_cache_directory = config['download_cache_directory']
    in_path = os.path.join(download_cache_directory, ticket['data_json'])
    if not os.path.exists(in_path):
        ticket.fail('Parsed content for %s not found' % in_path)
        return
    data = open(in_path).read()
    if len(data) < 1:
        raise Exception('The parsed data in this ticket is empty.' )
    
    # TODO check for metadata section to update collection from this?
    owner = bibserver.dao.Account.get(ticket['owner'])
    importer = Importer(owner=owner)
    collection = {
        'label': ticket['collection'],
        'collection': util.slugify(ticket['collection']),
        'description': ticket.get('description'),
        'source': ticket['source_url'],
        'format': ticket['format'],
        'license': ticket.get('license', u"Not specified"),
    }
    collection, records = importer.upload(open(in_path), collection)
    ticket['state'] = 'done'
    ticket.save()
Example #2
0
def index(ticket):
    ticket['state'] = 'populating_index'
    ticket.save()
    # Make sure the parsed content is in the cache
    download_cache_directory = config['download_cache_directory']
    in_path = os.path.join(download_cache_directory, ticket['data_json'])
    if not os.path.exists(in_path):
        ticket.fail('Parsed content for %s not found' % in_path)
        return
    data = open(in_path).read()
    if len(data) < 1:
        raise Exception('The parsed data in this ticket is empty.')

    # TODO check for metadata section to update collection from this?
    owner = bibserver.dao.Account.get(ticket['owner'])
    importer = Importer(owner=owner)
    collection = {
        'label': ticket['collection'],
        'collection': util.slugify(ticket['collection']),
        'description': ticket.get('description'),
        'source': ticket['source_url'],
        'format': ticket['format'],
        'license': ticket.get('license', u"Not specified"),
    }
    collection, records = importer.upload(open(in_path), collection)
    ticket['state'] = 'done'
    ticket.save()
Example #3
0
    def index(self, collection_dict, record_dicts, metadata):
        '''Add this collection and its records to the database index.
        :return: (collection, records) tuple of collection and associated
        record objects.
        '''
        collection = bibserver.dao.Collection(**collection_dict)
        if metadata:
            for key,val in metadata.iteritems():
                collection[key] = val
        timestamp = datetime.now().isoformat()
        collection['created'] = timestamp
        assert 'label' in collection, 'Collection must have a label'
        if not 'id' in collection:
            collection['id'] = util.slugify(collection['label'])
        collection['owner'] = self.owner.id

        delid = collection['id']
        for coll in self.owner.collections:
            if 'source' in coll and 'source' in collection:
                if coll['source'] == collection['source']:
                    if coll['id'] != collection['id']:
                        delid = coll['id']
                        bibserver.dao.Collection.delete_by_query('id:' + coll['id'])
                        break
                    else:
                        collection = coll
                        break
            if coll['id'] == collection['id']:
                collection = coll
                break

        bibserver.dao.Record.delete_by_query('collection'+config["facet_field"]+':"' + delid + '"')

        collection['records'] = len(record_dicts)
        collection['modified'] = timestamp
        collection.save()

        for rec in record_dicts:
            if 'collection' in rec:
                if collection["id"] not in rec["collection"]:
                    rec['collection'].append(collection["id"])
            else:
                rec['collection'] = [collection["id"]]
            if not self.requesturl and 'SITE_URL' in config:
                self.requesturl = str(config['SITE_URL'])
            if self.requesturl:
                if not self.requesturl.endswith('/'):
                    self.requesturl += '/'
                rec['url'] = self.requesturl + 'record/'
                if 'citekey' in rec:
                    rec['url'] += collection['id'] + '/' + rec.get('citekey')
                elif 'id' in rec:
                    rec['url'] += rec['id']
                else:
                    rec['id'] = uuid.uuid4().hex
                    rec['url'] += rec['id']
        records = bibserver.dao.Record.bulk_upsert(record_dicts)
        return collection, records
Example #4
0
    def index(self, collection_dict, record_dicts):
        '''Add this collection and its records to the database index.
        :return: (collection, records) tuple of collection and associated
        record objects.
        '''
        collection = bibserver.dao.Collection(**collection_dict)
        assert 'label' in collection, 'Collection must have a label'
        if not 'collection' in collection:
            collection['collection'] = util.slugify(collection['label'])
        collection['owner'] = self.owner.id

        delid = collection['collection']
        for coll in self.owner.collections:
            if 'source' in coll and 'source' in collection:
                if coll['source'] == collection['source']:
                    if coll['collection'] != collection['collection']:
                        delid = coll['collection']
                        bibserver.dao.Collection.delete_by_query('collection:"' + coll['collection'] + '" AND owner:"' + collection['owner'] + '"')
                        break
                    else:
                        collection = coll
                        break
            if coll['collection'] == collection['collection']:
                collection = coll
                break
        bibserver.dao.Record.delete_by_query('collection'+config["facet_field"]+':"' + delid + '" AND owner'+config["facet_field"]+':"' + collection['owner'] + '"')

        collection.save()

        for rec in record_dicts:
            rec['owner'] = collection['owner']
            if 'collection' in rec:
                if collection['collection'] != rec['collection']:
                    rec['collection'] = collection['collection']
            else:
                rec['collection'] = collection['collection']
            if not self.requesturl and 'SITE_URL' in config:
                self.requesturl = str(config['SITE_URL'])
            if self.requesturl:
                if not self.requesturl.endswith('/'):
                    self.requesturl += '/'
                rec['url'] = self.requesturl + collection['owner'] + '/' + collection['collection'] + '/'
                if 'cid' in rec:
                    rec['url'] += rec['cid']
                elif 'id' in rec:
                    rec['url'] += rec['id']
                else:
                    rec['id'] = uuid.uuid4().hex
                    rec['url'] += rec['id']
        records = bibserver.dao.Record.bulk_upsert(record_dicts)
        return collection, records
Example #5
0
 def test_02_collection(self):
     label = u'My Collection'
     slug = util.slugify(label)
     colldict = {
         'label': label,
         'slug': slug,
         'owner': Fixtures.account.id
         }
     coll = dao.Collection.upsert(colldict)
     assert coll.id, coll
     assert coll['label'] == label
     # should only be one collection for this account so this is ok
     account_colls = Fixtures.account.collections
     assert coll.id == account_colls[0].id, account_colls
Example #6
0
 def test_02_collection(self):
     label = u'My Collection'
     slug = util.slugify(label)
     colldict = {
         'label': label,
         'slug': slug,
         'owner': Fixtures.account.id
         }
     coll = dao.Collection.upsert(colldict)
     assert coll.id, coll
     assert coll['label'] == label
     # should only be one collection for this account so this is ok
     account_colls = Fixtures.account.collections
     assert coll.id == account_colls[0].id, account_colls
Example #7
0
def parse():
    # TODO: acceptable formats should be derived by some sort of introspection 
    # from the parser.py based on what parsers are actually available.
    if 'format' not in request.values or 'source' not in request.values:
        if 'format' not in request.values and 'source' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source URL parameter and source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' )
        elif 'format' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source format parameter", "acceptable_formats": ["bibtex","json","csv"]}' )
        elif 'source' not in request.values:
            resp = make_response( '{"error": "Parser cannot run without source URL parameter"}')
        resp.mimetype = "application/json"
        return resp

    format = request.values.get("format").strip('"')
    source = request.values.get("source").strip('"')

    try:
        if not source.startswith('http://') and not source.startswith('https://'):
            source = 'http://' + source
        source = urllib2.unquote(source)
        fileobj = urllib2.urlopen(source)
    except:
        resp = make_response( '{"error": "Retrieval of file from source ' + source + ' failed"}' )
        resp.mimetype = "application/json"
        return resp

    parser = Parser()
    newcoll = {}
    newcoll['records'], newcoll['metadata'] = parser.parse(fileobj, format=format)
    newcoll['metadata']['source'] = source
    timestamp = datetime.now().isoformat()
    newcoll['metadata']['created'] = timestamp
    if request.values.get('collection',None):
        collection = request.values['collection'].strip('"')
        newcoll['metadata']['label'] = collection
        newcoll['metadata']['id'] = util.slugify(collection)
        for record in newcoll['records']:
            record['collection'] = newcoll['metadata']['id']
    resp = make_response( json.dumps(newcoll, sort_keys=True, indent=4) )
    resp.mimetype = "application/json"
    return resp
Example #8
0
    def index(self, collection_dict, record_dicts):
        """Add this collection and its records to the database index.
        :return: (collection, records) tuple of collection and associated
        record objects.
        """
        col_label_slug = util.slugify(collection_dict["label"])
        collection = bibserver.dao.Collection.get_by_owner_coll(self.owner.id, col_label_slug)
        if not collection:
            collection = bibserver.dao.Collection(**collection_dict)
            assert "label" in collection, "Collection must have a label"
            if not "collection" in collection:
                collection["collection"] = col_label_slug
            collection["owner"] = self.owner.id

        collection.save()

        for rec in record_dicts:
            if not type(rec) is dict:
                continue
            rec["owner"] = collection["owner"]
            if "collection" in rec:
                if collection["collection"] != rec["collection"]:
                    rec["collection"] = collection["collection"]
            else:
                rec["collection"] = collection["collection"]
            if not self.requesturl and "SITE_URL" in config:
                self.requesturl = str(config["SITE_URL"])
            if self.requesturl:
                if not self.requesturl.endswith("/"):
                    self.requesturl += "/"
                if "_id" not in rec:
                    rec["_id"] = bibserver.dao.make_id(rec)
                rec["url"] = self.requesturl + collection["owner"] + "/" + collection["collection"] + "/"
                if "id" in rec:
                    rec["url"] += rec["id"]
                elif "_id" in rec:
                    rec["url"] += rec["_id"]
        bibserver.dao.Record.bulk_upsert(record_dicts)
        return collection, record_dicts
Example #9
0
    def index(self, collection_dict, record_dicts):
        '''Add this collection and its records to the database index.
        :return: (collection, records) tuple of collection and associated
        record objects.
        '''
        col_label_slug = util.slugify(collection_dict['label'])
        collection = bibserver.dao.Collection.get_by_owner_coll(self.owner.id, col_label_slug)
        if not collection:
            collection = bibserver.dao.Collection(**collection_dict)
            assert 'label' in collection, 'Collection must have a label'
            if not 'collection' in collection:
                collection['collection'] = col_label_slug
            collection['owner'] = self.owner.id

        collection.save()

        for rec in record_dicts:
            if not type(rec) is dict: continue
            rec['owner'] = collection['owner']
            if 'collection' in rec:
                if collection['collection'] != rec['collection']:
                    rec['collection'] = collection['collection']
            else:
                rec['collection'] = collection['collection']
            if not self.requesturl and 'SITE_URL' in config:
                self.requesturl = str(config['SITE_URL'])
            if self.requesturl:
                if not self.requesturl.endswith('/'):
                    self.requesturl += '/'
                if '_id' not in rec:
                    rec['_id'] = bibserver.dao.make_id(rec)
                rec['url'] = self.requesturl + collection['owner'] + '/' + collection['collection'] + '/'
                if 'id' in rec:
                    rec['url'] += rec['id']
                elif '_id' in rec:
                    rec['url'] += rec['_id']
        bibserver.dao.Record.bulk_upsert(record_dicts)
        return collection, record_dicts