def main(): from io import open as uopen import argparse parser = argparse.ArgumentParser() parser.add_argument('fname') parser.add_argument('idx', default=2, type=int) parser.add_argument('--key', default=u'V;1;SG;IND;PST;PFV') parser.add_argument('--shuffle', action='store_true') parser.add_argument('--folds', default=10, type=int) parser.add_argument('--lang', default='sp') parser.add_argument('--key-idx', default=3, type=int) args = parser.parse_args() fh = uopen(args.fname, encoding='utf-8') lines = [x.strip().split(u'\t') for x in fh] to_extract = [(x[0], x[args.idx]) for x in lines if x[args.key_idx] == args.key] if args.shuffle: from random import shuffle shuffle(to_extract) from distutils.dir_util import mkpath from sklearn.model_selection import ShuffleSplit rs = ShuffleSplit(n_splits=args.folds, test_size=0.2, random_state=42) for i, (train_indices, test_indices) in enumerate(rs.split(to_extract)): mkpath('res/ryan_splits/{}-10fold/{}'.format(args.lang, i)) train_fh, dev_fh, test_fh = (uopen('res/ryan_splits/{}-10fold/{}/train.uniq'.format(args.lang, i), mode='w', encoding='utf-8'), uopen('res/ryan_splits/{}-10fold/{}/dev.uniq'.format(args.lang, i), mode='w', encoding='utf-8'), uopen('res/ryan_splits/{}-10fold/{}/test.uniq'.format(args.lang, i), mode='w', encoding='utf-8'), ) for idx in train_indices: train_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1])) for j, idx in enumerate(test_indices): if j % 2 == 0: dev_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1])) else: test_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1]))
from setuptools import setup import setuptools.command.install import setuptools.command.develop import os import sys if sys.version_info < (3, 0): from io import open as uopen else: uopen = open here = os.path.abspath(os.path.dirname(__file__)) readme_rst = os.path.join(here, 'README.rst') with uopen(readme_rst, encoding='utf-8') as f: readme = f.read() about = {} about_py = os.path.join(here, 'about.py') with open(about_py) as f: exec(f.read(), about) class develop(setuptools.command.develop.develop): def _make_symlinks(self, sources): here = os.path.abspath(os.path.dirname(__file__)) subdir = os.path.join(here, 'sconstool', 'cxxtest') reldir = os.path.join(os.path.pardir, os.path.pardir) if not os.path.exists(subdir): os.makedirs(subdir) for source in sources:
webext_manifest['version'] = chromium_manifest['version'] with open(webext_manifest_file, 'w') as f2: json.dump(webext_manifest, f2, indent=2, separators=(',', ': '), sort_keys=True) f2.write('\n') # Legacy part descriptions = OrderedDict({}) source_locale_dir = os.path.join(build_dir, 'webextension', '_locales') for alpha2 in sorted(os.listdir(source_locale_dir)): locale_path = os.path.join(source_locale_dir, alpha2, 'messages.json') with uopen(locale_path, encoding='utf-8') as f: strings = json.load(f, object_pairs_hook=OrderedDict) alpha2 = alpha2.replace('_', '-') descriptions[alpha2] = strings['extShortDesc']['message'] webext_manifest['author'] = chromium_manifest['author'] webext_manifest['name'] = chromium_manifest['name'] + '/webext-hybrid' webext_manifest['homepage'] = 'https://github.com/gorhill/uBlock' webext_manifest['description'] = descriptions['en'] del descriptions['en'] match = re.search('^(\d+\.\d+\.\d+)(\.\d+)$', chromium_manifest['version']) if match: buildtype = int(match.group(2)[1:]) if buildtype < 100: builttype = 'b' + str(buildtype)
webext_manifest_file = os.path.join(build_dir, 'webextension', 'manifest.json') with open(webext_manifest_file) as f2: webext_manifest = json.load(f2) webext_manifest['version'] = chromium_manifest['version'] with open(webext_manifest_file, 'w') as f2: json.dump(webext_manifest, f2, indent=2, separators=(',', ': '), sort_keys=True) f2.write('\n') descriptions = OrderedDict({}) source_locale_dir = os.path.join(build_dir, 'webextension', '_locales') for alpha2 in sorted(os.listdir(source_locale_dir)): locale_path = os.path.join(source_locale_dir, alpha2, 'messages.json') with uopen(locale_path, encoding='utf-8') as f: strings = json.load(f, object_pairs_hook=OrderedDict) alpha2 = alpha2.replace('_', '-') descriptions[alpha2] = strings['extShortDesc']['message'] webext_manifest['author'] = chromium_manifest['author']; webext_manifest['name'] = chromium_manifest['name'] + '/embed-webext'; webext_manifest['homepage'] = 'https://github.com/uBlockAdmin/uBlock' webext_manifest['description'] = descriptions['en'] del descriptions['en'] webext_manifest['localized'] = [] t = ' ' t3 = 3 * t for alpha2 in descriptions: if alpha2 == 'en': continue
from setuptools import setup import setuptools.command.install import setuptools.command.develop import os import sys if sys.version_info < (3, 0): from io import open as uopen else: uopen = open here = os.path.abspath(os.path.dirname(__file__)) readme_rst = os.path.join(here, 'README.rst') with uopen(readme_rst, encoding='utf-8') as f: readme = f.read() about = {} about_py = os.path.join(here, 'about.py') with open(about_py) as f: exec(f.read(), about) class develop(setuptools.command.develop.develop): def _make_symlinks(self, sources): here = os.path.abspath(os.path.dirname(__file__)) subdir = os.path.join(here, 'sconstool', 'texas') reldir = os.path.join(os.path.pardir, os.path.pardir) if not os.path.exists(subdir): os.makedirs(subdir) for source in sources: