Example #1
0
 def run(self):
     with self.output().open('w') as output:
         for k1, k2 in itertools.combinations(list(self.input().keys()), 2):
             s1 = load_set_from_target(self.input().get(k1))
             s2 = load_set_from_target(self.input().get(k2))
             for issn in sorted(s1.intersection(s2)):
                 output.write_tsv(k1, k2, issn)
Example #2
0
File: adhoc.py Project: miku/siskin
    def run(self):
        amsl = load_set_from_target(self.input().get('amsl'))
        crossref = load_set_from_target(self.input().get('crossref'))

        with self.output().open('w') as output:
            stats = {
                'amsl': amsl,
                'crossref': crossref,
                'amsl_only': amsl - crossref,
                'crossref_only': crossref - amsl,
                'both': amsl & crossref,
            }
            output.write(json.dumps(stats, cls=SetEncoder))
Example #3
0
File: adhoc.py Project: zazi/siskin
    def run(self):
        amsl = load_set_from_target(self.input().get('amsl'))
        crossref = load_set_from_target(self.input().get('crossref'))

        with self.output().open('w') as output:
            stats = {
                'amsl': amsl,
                'crossref': crossref,
                'amsl_only': amsl - crossref,
                'crossref_only': crossref - amsl,
                'both': amsl & crossref,
            }
            output.write(json.dumps(stats, cls=SetEncoder))
Example #4
0
 def run(self):
     seen = load_set_from_target(self.input().get('seen'))
     written = set()
     with self.input().get('mapping').open() as handle:
         with self.output().open('w') as output:
             for row in handle.iter_tsv(cols=('prefix', 'name', 'current')):
                 if row.prefix not in seen:
                     self.logger.debug("not seen: %s", row.prefix)
                     continue
                 cut = row[:2]
                 if tuple(cut) not in written:
                     output.write_tsv(*cut)
                     written.add(tuple(cut))
Example #5
0
File: doaj.py Project: miku/siskin
    def run(self):
        identifier_blacklist = load_set_from_target(self.input().get('blacklist'))
        excludes = load_set_from_file(self.assets('028_doaj_filter.tsv'), func=lambda line: line.replace("-", ""))

        with self.output().open('w') as output:
            with self.input().get('dump').open() as handle:
                for line in handle:
                    record, skip = json.loads(line), False
                    if record['id'] in identifier_blacklist:
                        continue
                    for issn in record["bibjson"]["journal"]["issns"]:
                        issn = issn.replace("-", "").strip()
                        if issn in excludes:
                            skip = True
                            break
                    if skip:
                        continue
                    output.write(line)