Python MediaHaven Examples

Programming Language: Python

Namespace/Package Name: pythonmodules.mediahaven

Class/Type: MediaHaven

Examples at hotexamples.com: 9

Python MediaHaven - 9 examples found. These are the top rated real world Python examples of pythonmodules.mediahaven.MediaHaven extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MediaHaven(8)

one(4)

get_alto(2)

search(2)

get_preview(1)

oai(1)

refresh_token(1)

Example #1

Show file

File: previews.py Project: viaacode/nvdgo-namelinking

def get_info(pid, words=None, extra_previews=True):
    def b64img(img):
        data = io.BytesIO()
        img.save(data, format='JPEG', quality=85)
        return base64.b64encode(data.getvalue()).decode()

    mh = MediaHaven()
    alto = mh.get_alto(pid)
    result = dict(
        pid=pid,
        words=len(words) if words is not None else 0,
        alto=alto.search_words(words),
        alto_link=alto.url
    )
    result['ocr_text'] = alto.text

    with mh.get_preview(pid) as im:
        result['previewImageUrl'] = im.meta['previewImagePath']
        result['meta'] = im.meta

        if result['words'] > 0 and extra_previews:
            result['preview_full'] = b64img(im.highlight_words(words, crop=False))
            result['preview'] = b64img(im.highlight_words(words))

        result['props'] = im.meta['mdProperties']

    return result

Example #2

Show file

class Importer:
    def __init__(self):
        self._solr = Solr(Config(section='solr')['url'])
        self._mh = MediaHaven(buffer_size=100)

    def add(self, item):
        self._solr.add([item])

    def process(self, item):
        if item is None:
            raise Exception("Invalid item passed (None)")

        if type(item) is not str:
            pid = item['externalId']
        else:
            pid = item
            item = self._mh.one('+(externalId:%s)' % pid)

        if not pid:
            raise "No pid for item %s" % (item, )

        language = ''
        try:
            language = item['mdProperties']['language'][0].lower()
        except Exception as e:
            logger.warning('no language found for %s', pid)
            logger.exception(e)

        alto = self._mh.get_alto(item)
        if not alto:
            logger.debug("no alto for pid '%s' " % (pid, ))
            text = ''
        else:
            text = Conversions.normalize(alto.text)
        self.add(dict(id=pid, text=text, language=language))

Example #3

Show file

 def oai(self):
     mh = MediaHaven(self._config)
     fragment_id = 'EeZNC2b9TeYMKMQRNcVJnumk'
     try:
         mh.oai().GetRecord(identifier='umid:%s' % fragment_id,
                            metadataPrefix='mets')
     except IdDoesNotExist:
         pass

Example #4

Show file

File: stats.py Project: viaacode/nvdgo-namelinking

    def _stats_pcts(self):
        mh = MediaHaven()

        nl_count = len(Datasources['namenlijst']['func']())
        mh_count = len(mh.search('+(workflow:GMS) +(archiveStatus:on_tape)'))

        data = OrderedDict({
            '':
            'COUNT(*)',
            'names from IFFM namenlijst': ('COUNT(DISTINCT nmlid)', nl_count),
            'newspaper pages': ('COUNT(DISTINCT pid)', mh_count),
        })

        for k, v in data.items():
            total = None
            if type(v) is tuple:
                total = v[1]
                v = v[0]

            args = (v, self.table, self.model.SKIP)
            res = self.db.execute('SELECT %s FROM %s WHERE status != %d' %
                                  args)
            matches = int(res.scalar())

            res = self.db.execute(
                'SELECT %s FROM %s WHERE status != %d and score > 0' % args)
            matches_with_score = int(res.scalar())
            counts = [
                matches,
                matches_with_score,
                matches_with_score / matches,
            ]

            if total is not None:
                counts.append(total)
                counts.append(matches / total)
                counts.append(matches_with_score / total)
            data[k] = counts

        return data

Example #5

Show file

File: train-ner.py Project: viaacode/nvdgo-namelinking

    if not args.profile:
        logging.getLogger('pythonmodules.profiling').setLevel(logging.ERROR)

    samples = Samples(GMB())
    if args.train:
        with timeit('Creating NamedEntityChunker'):
            chunker = NamedEntityChunker(samples.training())
        pickle.dump(chunker, open(args.pickle, 'wb'))
    else:
        with timeit('Pickle load'):
            chunker = pickle.load(open(args.pickle, 'rb'))

    if args.test_mediahaven:
        with timeit('NER Tagging'):
            from pythonmodules.mediahaven import MediaHaven

            # from pythonmodules.config import Config
            mh = MediaHaven()
            item = mh.one('+(workflow:GMS) +(archiveStatus:on_tape)')
            print(chunker.parse(pos_tag(word_tokenize(item['description']))))

    if args.test:
        with timeit('Testing accuracy'):
            testsamples = samples.test(args.test)
            to_evaluate = (conlltags2tree([(w, t, iob)
                                           for (w, t), iob in iobs])
                           for iobs in tqdm(testsamples, total=args.test))
            score = chunker.evaluate(to_evaluate)
            print("Test accuracy = %.2f%% (tested using %d samples)" %
                  (score.accuracy() * 100, int(args.test)))

Example #6

Show file

 def mediahaven(self):
     mh = MediaHaven(self._config)
     mh.one('+(externalId:f76639mh4g_19180801_0001)')

Example #7

Show file

logger.propagate = True


config = Config()
table_name = config['db']['table_name']
parser = ArgumentParser(description='Use NER to look for entities and save')
parser.add_argument('--debug', action='store_true', help='Show debug info')
parser.add_argument('--test-connection', action='store_true', help='Just do a connection test to MediaHaven')
parser.add_argument('--clear', action='store_true', help='Clear the table before inserting')
parser.add_argument('--continue', action='store_true', help='Continue from last inserted row')
parser.add_argument('--continue-from', help='Continue from row CONTINUE_FROM')
parser.add_argument('--table', help='The table to store the results in, default: %s' % table_name)
args = parser.parse_args()


mh = MediaHaven(config)

clear_db = args.clear

if args.test_connection:
    mh.refresh_token()
    print(type(mh.one()) is dict)
    exit()

db = create_engine(config['db']['connection_url'])
db.connect()
meta = MetaData(db, reflect=True)
if args.table:
    table_name = args.table
try:
    table = meta.tables[table_name]

Example #8

Show file

 def __init__(self):
     self._solr = Solr(Config(section='solr')['url'])
     self._mh = MediaHaven(buffer_size=100)

Example #9

Show file

File: matcher.py Project: viaacode/nvdgo-namelinking

 def __init__(self, config=None, force_regen=False):
     self.nl = Namenlijst(config)
     self.mh = MediaHaven(config)
     self.force_regen = force_regen