Python strictstrip Examples, lib.NLP.strictstrip Python Examples

Example #1

0

Show file

File: refine-pushTitle.py Project: bibtex/bibsleigh

def checkon(fn, o):
    global k2r, v2o
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    if o.filename.startswith(d2r):
        # if we find a proceedings volume, take its title to all descendant booktitles
        if o.json['type'] == 'proceedings':
            k2r = 'booktitle'
            v2o = o.json['title']
            return 0
        # just in case
        if o.json['type'] not in ('inproceedings', 'article'):
            return 0
    else:
        return 0
    # f = open(fn, 'r', encoding='utf-8')
    # lines = f.readlines()[1:-1]
    # f.close()
    # flines = [strictstrip(s) for s in lines]
    plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if k2r in o.json.keys():
        o.json[k2r] = v2o
    else:
        return 0
    nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0

Example #2

0

Show file

File: refine-committees.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if o.get('type') not in ('proceedings', 'book'):
		# we don't go per paper
		return 0
	if o.getKey() not in roles.keys():
		# you know nothing, scraped CSV
		return 0
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	# if 'roles' not in o.json.keys():
	if True:
		# no prior knowledge of roles => we know everything
		o.json['roles'] = sorted(roles[o.getKey()], key=lambda x: x[1])
	else:
		# prior knowledge of roles => treat per case
		o.json['roles'] = sorted(o.json['roles'] + [r for r in roles[o.getKey()] if r not in o.json['roles']], key=lambda x: x[1])
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]], key=lambda x: x[1])
	# The next case should not happen, but could if we have trivial lists
	# if flines != plines:
	# 	return 1
	if plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0

Example #3

0

Show file

File: refine-valueChange.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if d2r and (not o.filename.startswith(d2r) or o.filename == d2r):
		return 0
	# f = open(fn, 'r')
	# lines = f.readlines()[1:-1]
	# f.close()
	# flines = [strictstrip(s) for s in lines]
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if k2r in o.json.keys():
		if o.json[k2r] == v2i:
			o.json[k2r] = v2o
		else:
			return 0
	else:
		if not v2i:
			o.json[k2r] = v2o
		else:
			return 1
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0

Example #4

0

Show file

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if d2r and (not o.filename.startswith(d2r) or o.filename == d2r):
		return 0
	# f = open(fn, 'r', encoding='utf-8')
	# lines = f.readlines()[1:-1]
	# f.close()
	# flines = [strictstrip(s) for s in lines]
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if k2r in o.json.keys():
		if o.json[k2r] == v2i:
			o.json[k2r] = v2o
		else:
			return 0
	else:
		if not v2i:
			o.json[k2r] = v2o
		else:
			return 1
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		f = open(fn, 'w', encoding='utf-8')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0

Example #5

0

Show file

File: refine-lexFix.py Project: bibtex/bibsleigh

def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    f = open(fn, 'r', encoding='utf-8')
    flines = f.readlines()[1:-1]
    f.close()
    sflines = [strictstrip(s) for s in flines]
    sjlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    jlines = ['\t{},\n'.format(s) for s in sjlines]
    jlines[-1] = jlines[-1][:-2] + '\n'  # remove the last comma
    if sflines != sjlines:
        return 1
    elif jlines != flines:
        # f1 = [s for s in jlines if s not in flines]
        # f2 = [s for s in flines if s not in jlines]
        # print('∆:', f1, '\nvs', f2)
        f = open(fn, 'w', encoding='utf-8')
        f.write('{\n')
        for line in jlines:
            f.write(line)
        f.write('}')
        f.close()
        return 2
    else:
        return 0

Example #6

0

Show file

File: refine-checkJson.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if not os.path.exists(fn):
		# if it still does not exist, let us create a minimal one
		f = open(fn, 'w', encoding='utf-8')
		f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\
			name=lastSlash(fn)[:-5].replace('-', ' '),
			year=findYear(lastSlash(fn))\
		))
		f.close()
		print('[ {} ] {}'.format(C.yellow('MADE'), fn))
		return 2
	f = open(fn, 'r', encoding='utf-8')
	lines = f.readlines()[1:-1]
	f.close()
	for line in lines:
		if line.find('"year"') > -1 and findYear(line) > 3000:
			os.remove(fn)
			print('[ {} ] {}'.format(C.red('KILL'), fn))
			return 1
	flines = sorted([strictstrip(s) for s in lines])
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if flines != plines:
		f1 = [line for line in flines if line not in plines]
		f2 = [line for line in plines if line not in flines]
		print('∆:', f1, '\nvs', f2)
	if flines == plines:
		return 0
	else:
		return 1

Example #7

0

Show file

File: refine-lexFix.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	flines = f.readlines()[1:-1]
	f.close()
	sflines = [strictstrip(s) for s in flines]
	sjlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	jlines = ['\t{},\n'.format(s) for s in sjlines]
	jlines[-1] = jlines[-1][:-2] + '\n' # remove the last comma
	if sflines != sjlines:
		return 1
	elif jlines != flines:
		# f1 = [s for s in jlines if s not in flines]
		# f2 = [s for s in flines if s not in jlines]
		# print('∆:', f1, '\nvs', f2)
		f = open(fn, 'w')
		f.write('{\n')
		for line in jlines:
			f.write(line)
		f.write('}')
		f.close()
		return 2
	else:
		return 0

Example #8

0

Show file

File: refine-checkJson.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if not os.path.exists(fn):
		# if it still does not exist, let us create a minimal one
		f = open(fn, 'w')
		f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\
			name=fn.split('/')[-1][:-5].replace('-', ' '),
			year=findYear(fn.split('/')[-1])\
		))
		f.close()
		print('[ {} ] {}'.format(C.yellow('MADE'), fn))
		return 2
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	for line in lines:
		if line.find('"year"') > -1 and findYear(line) > 3000:
			os.remove(fn)
			print('[ {} ] {}'.format(C.red('KILL'), fn))
			return 1
	flines = sorted([strictstrip(s) for s in lines])
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if flines != plines:
		f1 = [line for line in flines if line not in plines]
		f2 = [line for line in plines if line not in flines]
		print('∆:', f1, '\nvs', f2)
	if flines == plines:
		return 0
	else:
		return 1

Example #9

0

Show file

File: refine-committees.py Project: bibtex/bibsleigh

def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    if o.get('type') not in ('proceedings', 'book'):
        # we don't go per paper
        return 0
    if o.getKey() not in roles.keys():
        # you know nothing, scraped CSV
        return 0
    plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    # if 'roles' not in o.json.keys():
    if True:
        # no prior knowledge of roles => we know everything
        o.json['roles'] = sorted(roles[o.getKey()], key=lambda x: x[1])
    else:
        # prior knowledge of roles => treat per case
        o.json['roles'] = sorted(
            o.json['roles'] +
            [r for r in roles[o.getKey()] if r not in o.json['roles']],
            key=lambda x: x[1])
    nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]],
                    key=lambda x: x[1])
    # The next case should not happen, but could if we have trivial lists
    # if flines != plines:
    # 	return 1
    if plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0

Example #10

0

Show file

def json2lines(j):
	res = []
	for line in j:
		line = strictstrip(line)
		if line in ('', '{', '}'):
			continue
		if line.startswith('"'):
			res.append(line)
		else:
			res[-1] += line
	return res

Example #11

0

Show file

File: JSON.py Project: bibtex/bibsleigh

def json2lines(j):
	res = []
	for line in j:
		line = strictstrip(line)
		if line in ('', '{', '}'):
			continue
		if line.startswith('"'):
			res.append(line)
		else:
			res[-1] += line
	return res

Example #12

0

Show file

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	for ae in ('author', 'editor'):
		if ae in o.json.keys():
			if isinstance(o.json[ae], str):
				if o.json[ae] in renameto.keys():
					o.json[ae] = renameto[o.json[ae]]
			else:
				for i, x in enumerate(o.json[ae]):
					if x in renameto.keys():
						o.json[ae][i] = renameto[x]
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		ff = open(fn, 'w', encoding='utf-8')
		ff.write(o.getJSON())
		ff.close()
		return 2
	else:
		return 0

Example #13

0

Show file

File: refine-aliases.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	for ae in ('author', 'editor'):
		if ae in o.json.keys():
			if isinstance(o.json[ae], str):
				if o.json[ae] in renameto.keys():
					o.json[ae] = renameto[o.json[ae]]
			else:
				for i, x in enumerate(o.json[ae]):
					if x in renameto.keys():
						o.json[ae][i] = renameto[x]
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		ff = open(fn, 'w')
		ff.write(o.getJSON())
		ff.close()
		return 2
	else:
		return 0

Example #14

0

Show file

File: refine-normValues.py Project: bibtex/bibsleigh

def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    for k in o.json.keys():
        if 'type' not in o.json.keys():
            print('TERRIBLE', o.getKey())
        if (o.json['type'] == 'proceedings' and k == 'title') or\
           (o.json['type'] == 'inproceedings' and k == 'booktitle'):
            # fix numbers
            for nr in nrs.keys():
                if o.json[k].find(' ' + nr + ' ') > -1:
                    o.json[k] = o.json[k].replace(' ' + nr + ' ',
                                                  ' ' + nrs[nr] + ' ')
        if isinstance(o.json[k], str):
            # add emdashes for fancier titles
            if k in ('title', 'booktitle'):
                o.json[k] = o.json[k].replace(' - ',
                                              ' — ').replace(' -- ', ' — ')
                # Nice heuristic to run from time to time, but reports too much
                # on stuff like “eXtreme” and “jPET”
                # if o.json[k][0].islower():
                # 	print('[ {} ] {}: {} {}'.format(C.red('LOOK'), o.getKey(), 'title is', o.get('title')))
            # normalised pages
            if k == 'pages':
                o.json[k] = o.json[k].replace('–', '-').replace('--',
                                                                '-').replace(
                                                                    '−', '-')
            # double spaces
            if o.json[k].find('  ') > -1:
                o.json[k] = o.json[k].replace('  ', ' ').strip()
            # find numeric values, turn them into proper integers
            if o.json[k].isdigit():
                o.json[k] = int(o.json[k])
                continue
            # remove confix curlies
            elif o.json[k].startswith('{') and o.json[k].endswith('}'):
                o.json[k] = o.json[k][1:-1]
            # single quotes to double quotes
            elif o.json[k].find(" '") > -1 and o.json[k].find("' ") > -1:
                o.json[k] = o.json[k].replace(" '", ' "').replace("' ", '" ')
            elif o.json[k].find(" '") > -1 and o.json[k].endswith("'"):
                o.json[k] = o.json[k].replace(" '", ' "').replace("'", '"')
            elif o.json[k].find("' ") > -1 and o.json[k].startswith("'"):
                o.json[k] = o.json[k].replace("' ", '" ').replace("'", '"')
            # fancify bland quotes
            elif o.json[k].find(' "') > -1 and o.json[k].find('" ') > -1:
                o.json[k] = o.json[k].replace(' "', ' “').replace('" ', '” ')
            elif o.json[k].find(' "') > -1 and o.json[k].endswith('"'):
                o.json[k] = o.json[k].replace(' "', ' “').replace('"', '”')
            elif o.json[k].find('" ') > -1 and o.json[k].startswith('"'):
                o.json[k] = o.json[k].replace('" ', '” ').replace('"', '“')
            # fancify LaTeX quotes
            elif o.json[k].find(' ``') > -1 and o.json[k].find("'' ") > -1:
                o.json[k] = o.json[k].replace("'' ", '” ').replace(' ``', ' “')
            elif o.json[k].find(' ``') > -1 and o.json[k].endswith("''"):
                o.json[k] = o.json[k].replace("''", '”').replace(' ``', ' “')
            elif o.json[k].find("'' ") > -1 and o.json[k].startswith('``'):
                o.json[k] = o.json[k].replace("'' ", '” ').replace('``', '“')
            elif o.json[k].startswith('``') and o.json[k].endswith("''"):
                o.json[k] = '“' + o.json[k][2:-2] + '”'
            # plural possessive
            elif o.json[k].find("'s") > -1:
                o.json[k] = o.json[k].replace("'s", '’s')
            elif o.json[k].find("s' ") > -1:
                o.json[k] = o.json[k].replace("s'", 's’')
            # contractions
            elif o.json[k].find("n't") > -1:
                o.json[k] = o.json[k].replace("n't", 'n’t')
            # the case of "Jr" vs "Jr."
            if k in ('author', 'editor') and o.json[k].endswith('Jr'):
                o.json[k] += '.'
            # TODO: report remaining suspicious activity
            for c in '`"\'':  # ’ is ok
                if c in o.json[k] and k not in ('author', 'editor'):
                    print('[ {} ] {}: {} is “{}”'.format(
                        C.red('LOOK'), o.getKey(), k, o.json[k]))
                    lookat.append(o.filename)
        elif isinstance(o.json[k], list):
            # inline trivial lists
            if len(o.json[k]) == 1:
                o.json[k] = o.json[k][0]
            # inline hidden trivial lists
            if len(o.json[k]) == 2 and o.json[k][0] == o.json[k][1] \
            and k not in ('stemmed', 'tag', 'tagged'):
                o.json[k] = o.json[k][0]
            # unless it’s 'tagged'
            if k == 'tagged' and not isinstance(o.json[k][0], list):
                o.json[k] = [o.json[k]]
            # remove DBLP disambiguation: we might later regret it
            # but the information can be always re-retrieved
            if k in ('author', 'editor'):
                nas = []
                for a in o.json[k]:
                    # double spaces
                    if a.find('  ') > -1:
                        a = a.replace('  ', ' ').strip()
                    ws = a.split(' ')
                    if ws[-1].isdigit():
                        ws = ws[:-1]
                    nas.append(' '.join(ws))
                o.json[k] = nas
                # the case of "Jr" vs "Jr."
                o.json[k] = [
                    a + '.' if a.endswith(' Jr') else a for a in o.json[k]
                ]
    nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0

Example #15

0

Show file

File: refine-retag.py Project: bibtex/bibsleigh

def checkon(fn, o):
    if os.path.isdir(fn):
        fn = fn + '.json'
    f = open(fn, 'r', encoding='utf-8')
    lines = f.readlines()[1:-1]
    f.close()
    flines = [strictstrip(s) for s in lines]
    plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if flines != plines:
        return 1
    ts = []
    # precise case-sensitive match
    mcs = o.get('title')
    # precise match for substrings
    mes = baretext(mcs)
    # precise match for words
    mew = mes.split(' ')
    # imprecise match for substrings
    mis = superbaretext(mes)
    # imprecise match for words
    miw = mis.split(' ')
    # now match!
    for t in tags:
        # print('Check',t,'vs',mes)
        if 'name' not in t.keys():
            print(C.red('ERROR:'), 'no name for tag from file', t['FILE'])
            continue
        if all([not k.startswith('match') for k in t.keys()]):
            print(C.red('ERROR:'), 'no match rules for tag', t['name'])
            continue
        for k in t.keys():
            if k == 'matchentry':
                if o.getKey() in t[k]:
                    ts += [t['name']]
            elif k.startswith('match'):
                ts += [
                    t['name'] for s in listify(t[k])
                    if matchModes[k](s, mcs, mes, mew, mis, miw)
                ]
                # ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)]
    # second pass: check reliefs
    for t in tags:
        if 'relieves' in t.keys():
            for r in listify(t['relieves']):
                if t['name'] in ts and r in ts:
                    ts.remove(r)
                    if t['name'] not in relieved.keys():
                        relieved[t['name']] = 0
                    relieved[t['name']] += 1
    if ts:
        if not o.tags:
            o.tags = []
        for t in ts:
            if t not in o.tags:
                o.tags.append(t)
    # uncomment the following one line to overwrite all tags
    o.tags = uniq(ts)
    # let’s keep tags clean and sorted
    o.tags = sorted(o.tags)
    nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0

Example #16

0

Show file

File: refine-normValues.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	for k in o.json.keys():
		if 'type' not in o.json.keys():
			print('TERRIBLE',o.getKey())
		if (o.json['type'] == 'proceedings' and k == 'title') or\
		   (o.json['type'] == 'inproceedings' and k == 'booktitle'):
			# fix numbers
			for nr in nrs.keys():
				if o.json[k].find(' '+nr+' ') > -1:
					o.json[k] = o.json[k].replace(' '+nr+' ', ' '+nrs[nr]+' ')
		if isinstance(o.json[k], str):
			# add emdashes for fancier titles
			if k in ('title', 'booktitle'):
				o.json[k] = o.json[k].replace(' - ', ' — ').replace(' -- ', ' — ')
				# Nice heuristic to run from time to time, but reports too much
				# on stuff like “eXtreme” and “jPET”
				# if o.json[k][0].islower():
				# 	print('[ {} ] {}: {} {}'.format(C.red('LOOK'), o.getKey(), 'title is', o.get('title')))
			# normalised pages
			if k == 'pages':
				o.json[k] = o.json[k].replace('–', '-').replace('--', '-').replace('−', '-')
			# double spaces
			if o.json[k].find('  ') > -1:
				o.json[k] = o.json[k].replace('  ', ' ').strip()
			# find numeric values, turn them into proper integers
			if o.json[k].isdigit():
				o.json[k] = int(o.json[k])
				continue
			# remove confix curlies
			elif o.json[k].startswith('{') and o.json[k].endswith('}'):
				o.json[k] = o.json[k][1:-1]
			# single quotes to double quotes
			elif o.json[k].find(" '") > -1 and o.json[k].find("' ") > -1:
				o.json[k] = o.json[k].replace(" '", ' "').replace("' ", '" ')
			elif o.json[k].find(" '") > -1 and o.json[k].endswith("'"):
				o.json[k] = o.json[k].replace(" '", ' "').replace("'", '"')
			elif o.json[k].find("' ") > -1 and o.json[k].startswith("'"):
				o.json[k] = o.json[k].replace("' ", '" ').replace("'", '"')
			# fancify bland quotes
			elif o.json[k].find(' "') > -1 and o.json[k].find('" ') > -1:
				o.json[k] = o.json[k].replace(' "', ' “').replace('" ', '” ')
			elif o.json[k].find(' "') > -1 and o.json[k].endswith('"'):
				o.json[k] = o.json[k].replace(' "', ' “').replace('"', '”')
			elif o.json[k].find('" ') > -1 and o.json[k].startswith('"'):
				o.json[k] = o.json[k].replace('" ', '” ').replace('"', '“')
			# fancify LaTeX quotes
			elif o.json[k].find(' ``') > -1 and o.json[k].find("'' ") > -1:
				o.json[k] = o.json[k].replace("'' ", '” ').replace(' ``', ' “')
			elif o.json[k].find(' ``') > -1 and o.json[k].endswith("''"):
				o.json[k] = o.json[k].replace("''", '”').replace(' ``', ' “')
			elif o.json[k].find("'' ") > -1 and o.json[k].startswith('``'):
				o.json[k] = o.json[k].replace("'' ", '” ').replace('``', '“')
			elif o.json[k].startswith('``') and o.json[k].endswith("''"):
				o.json[k] = '“' + o.json[k][2:-2] + '”'
			# plural possessive
			elif o.json[k].find("'s") > -1:
				o.json[k] = o.json[k].replace("'s", '’s')
			elif o.json[k].find("s' ") > -1:
				o.json[k] = o.json[k].replace("s'", 's’')
			# contractions
			elif o.json[k].find("n't") > -1:
				o.json[k] = o.json[k].replace("n't", 'n’t')
			# the case of "Jr" vs "Jr."
			if k in ('author', 'editor') and o.json[k].endswith('Jr'):
				o.json[k] += '.'
			# TODO: report remaining suspicious activity
			for c in '`"\'': # ’ is ok
				if c in o.json[k] and k not in ('author', 'editor'):
					print('[ {} ] {}: {} is “{}”'.format(C.red('LOOK'), o.getKey(), k, o.json[k]))
					lookat.append(o.filename)
		elif isinstance(o.json[k], list):
			# inline trivial lists
			if len(o.json[k]) == 1:
				o.json[k] = o.json[k][0]
			# inline hidden trivial lists
			if len(o.json[k]) == 2 and o.json[k][0] == o.json[k][1] \
			and k not in ('stemmed', 'tag', 'tagged'):
				o.json[k] = o.json[k][0]
			# unless it’s 'tagged'
			if k == 'tagged' and not isinstance(o.json[k][0], list):
				o.json[k] = [o.json[k]]
			# remove DBLP disambiguation: we might later regret it
			# but the information can be always re-retrieved
			if k in ('author', 'editor'):
				nas = []
				for a in o.json[k]:
					# double spaces
					if a.find('  ') > -1:
						a = a.replace('  ', ' ').strip()
					ws = a.split(' ')
					if ws[-1].isdigit():
						ws = ws[:-1]
					nas.append(' '.join(ws))
				o.json[k] = nas
				# the case of "Jr" vs "Jr."
				o.json[k] = [a+'.' if a.endswith(' Jr') else a for a in o.json[k]]
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0

Example #17

0

Show file

File: refine-retag.py Project: bibtex/bibsleigh

def checkon(fn, o):
	if os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = [strictstrip(s) for s in lines]
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if flines != plines:
		return 1
	ts = []
	# precise case-sensitive match
	mcs = o.get('title')
	# precise match for substrings
	mes = baretext(mcs)
	# precise match for words
	mew = mes.split(' ')
	# imprecise match for substrings
	mis = superbaretext(mes)
	# imprecise match for words
	miw = mis.split(' ')
	# now match!
	for t in tags:
		# print('Check',t,'vs',mes)
		if 'name' not in t.keys():
			print(C.red('ERROR:'), 'no name for tag from file', t['FILE'])
			continue
		if all([not k.startswith('match') for k in t.keys()]):
			print(C.red('ERROR:'), 'no match rules for tag', t['name'])
			continue
		for k in t.keys():
			if k == 'matchentry':
				if o.getKey() in t[k]:
					ts += [t['name']]
			elif k.startswith('match'):
				ts += [t['name'] for s in listify(t[k]) if matchModes[k](s, mcs, mes, mew, mis, miw)]
				# ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)]
	# second pass: check reliefs
	for t in tags:
		if 'relieves' in t.keys():
			for r in listify(t['relieves']):
				if t['name'] in ts and r in ts:
					ts.remove(r)
					if t['name'] not in relieved.keys():
						relieved[t['name']] = 0
					relieved[t['name']] += 1
	if ts:
		if not o.tags:
			o.tags = []
		for t in ts:
			if t not in o.tags:
				o.tags.append(t)
	# uncomment the following one line to overwrite all tags
	o.tags = uniq(ts)
	# let’s keep tags clean and sorted
	o.tags = sorted(o.tags)
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0