Ejemplo n.º 1
0
def main():
    from io import open as uopen
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('fname')
    parser.add_argument('idx', default=2, type=int)
    parser.add_argument('--key', default=u'V;1;SG;IND;PST;PFV')
    parser.add_argument('--shuffle', action='store_true')
    parser.add_argument('--folds', default=10, type=int)
    parser.add_argument('--lang', default='sp')
    parser.add_argument('--key-idx', default=3, type=int)
    args = parser.parse_args()
    fh = uopen(args.fname, encoding='utf-8')
    lines = [x.strip().split(u'\t') for x in fh]
    to_extract = [(x[0], x[args.idx]) for x in lines if x[args.key_idx] == args.key]
    if args.shuffle:
        from random import shuffle
        shuffle(to_extract)
    from distutils.dir_util import mkpath
    from sklearn.model_selection import ShuffleSplit
    rs = ShuffleSplit(n_splits=args.folds, test_size=0.2, random_state=42)
    for i, (train_indices, test_indices) in enumerate(rs.split(to_extract)):
        mkpath('res/ryan_splits/{}-10fold/{}'.format(args.lang, i))
        train_fh, dev_fh, test_fh = (uopen('res/ryan_splits/{}-10fold/{}/train.uniq'.format(args.lang, i), mode='w', encoding='utf-8'),
                                     uopen('res/ryan_splits/{}-10fold/{}/dev.uniq'.format(args.lang, i), mode='w', encoding='utf-8'),
                                     uopen('res/ryan_splits/{}-10fold/{}/test.uniq'.format(args.lang, i), mode='w', encoding='utf-8'),
                                     )
        for idx in train_indices:
            train_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1]))

        for j, idx in enumerate(test_indices):
            if j % 2 == 0:
                dev_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1]))
            else:
                test_fh.write(u'{}\t{}\n'.format(to_extract[idx][0], to_extract[idx][1]))
Ejemplo n.º 2
0
from setuptools import setup
import setuptools.command.install
import setuptools.command.develop
import os
import sys

if sys.version_info < (3, 0):
    from io import open as uopen
else:
    uopen = open

here = os.path.abspath(os.path.dirname(__file__))

readme_rst = os.path.join(here, 'README.rst')
with uopen(readme_rst, encoding='utf-8') as f:
    readme = f.read()

about = {}
about_py = os.path.join(here, 'about.py')
with open(about_py) as f:
    exec(f.read(), about)

class develop(setuptools.command.develop.develop):
    def _make_symlinks(self, sources):
        here = os.path.abspath(os.path.dirname(__file__))
        subdir = os.path.join(here, 'sconstool', 'cxxtest')
        reldir = os.path.join(os.path.pardir, os.path.pardir)
        if not os.path.exists(subdir):
            os.makedirs(subdir)
        for source in sources:
Ejemplo n.º 3
0
webext_manifest['version'] = chromium_manifest['version']

with open(webext_manifest_file, 'w') as f2:
    json.dump(webext_manifest,
              f2,
              indent=2,
              separators=(',', ': '),
              sort_keys=True)
    f2.write('\n')

# Legacy part
descriptions = OrderedDict({})
source_locale_dir = os.path.join(build_dir, 'webextension', '_locales')
for alpha2 in sorted(os.listdir(source_locale_dir)):
    locale_path = os.path.join(source_locale_dir, alpha2, 'messages.json')
    with uopen(locale_path, encoding='utf-8') as f:
        strings = json.load(f, object_pairs_hook=OrderedDict)
    alpha2 = alpha2.replace('_', '-')
    descriptions[alpha2] = strings['extShortDesc']['message']

webext_manifest['author'] = chromium_manifest['author']
webext_manifest['name'] = chromium_manifest['name'] + '/webext-hybrid'
webext_manifest['homepage'] = 'https://github.com/gorhill/uBlock'
webext_manifest['description'] = descriptions['en']
del descriptions['en']

match = re.search('^(\d+\.\d+\.\d+)(\.\d+)$', chromium_manifest['version'])
if match:
    buildtype = int(match.group(2)[1:])
    if buildtype < 100:
        builttype = 'b' + str(buildtype)
Ejemplo n.º 4
0
webext_manifest_file = os.path.join(build_dir, 'webextension', 'manifest.json')
with open(webext_manifest_file) as f2:
    webext_manifest = json.load(f2)

webext_manifest['version'] = chromium_manifest['version']

with open(webext_manifest_file, 'w') as f2:
    json.dump(webext_manifest, f2, indent=2, separators=(',', ': '), sort_keys=True)
    f2.write('\n')

descriptions = OrderedDict({})
source_locale_dir = os.path.join(build_dir, 'webextension', '_locales')
for alpha2 in sorted(os.listdir(source_locale_dir)):
    locale_path = os.path.join(source_locale_dir, alpha2, 'messages.json')
    with uopen(locale_path, encoding='utf-8') as f:
        strings = json.load(f, object_pairs_hook=OrderedDict)
    alpha2 = alpha2.replace('_', '-')
    descriptions[alpha2] = strings['extShortDesc']['message']

webext_manifest['author'] = chromium_manifest['author'];
webext_manifest['name'] = chromium_manifest['name'] + '/embed-webext';
webext_manifest['homepage'] = 'https://github.com/uBlockAdmin/uBlock'
webext_manifest['description'] = descriptions['en']
del descriptions['en']
webext_manifest['localized'] = []
t = '    '
t3 = 3 * t
for alpha2 in descriptions:
    if alpha2 == 'en':
        continue
Ejemplo n.º 5
0
from setuptools import setup
import setuptools.command.install
import setuptools.command.develop
import os
import sys

if sys.version_info < (3, 0):
    from io import open as uopen
else:
    uopen = open

here = os.path.abspath(os.path.dirname(__file__))

readme_rst = os.path.join(here, 'README.rst')
with uopen(readme_rst, encoding='utf-8') as f:
    readme = f.read()

about = {}
about_py = os.path.join(here, 'about.py')
with open(about_py) as f:
    exec(f.read(), about)

class develop(setuptools.command.develop.develop):
    def _make_symlinks(self, sources):
        here = os.path.abspath(os.path.dirname(__file__))
        subdir = os.path.join(here, 'sconstool', 'texas')
        reldir = os.path.join(os.path.pardir, os.path.pardir)
        if not os.path.exists(subdir):
            os.makedirs(subdir)
        for source in sources: