def setup(self):
     self.xform_writer = csv.writer(open(XFORM_FILENAME, 'w+b'))
     self.xform_writer.writerow(XFORM_HEADER)
     self.case_writer = csv.writer(open(CASE_FILE_NAME, 'w+b'))
     self.case_writer.writerow(CASE_HEADER)
     self.forms_accessor = FormAccessors(self.domain)
     self.case_accessors = CaseAccessors(self.domain)
Beispiel #2
0
def track_users(ids):
    """
    Track users by id, writing to jsons folder.
    """
    print 'tracking', len(ids), 'users'
    outf = io.open(make_output_file(), mode='wt', encoding='utf8')
    count = 0
    for tweet in twutil.collect.track_user_ids(ids):
        try:
            outf.write(json.dumps(tweet, ensure_ascii=False, encoding='utf8'))
            outf.write(u'\n')
            outf.flush()
            count += 1
            if count > 100000:
                outf.close()
                outf = io.open(make_output_file(), mode='wt', encoding='utf8')
                count = 0
        except:
            e = sys.exc_info()
            print 'skipping error', e[0]
            print traceback.format_exc()
            twutil.collect.reinit()
            outf.close()
            track_users(ids)

    outf.close()
Beispiel #3
0
def main(dir):
    h = MyHTMLParser()

    getcharset_pattern = r'(?i)content=.*?charset=(.*?)["\']'
    for currentpath, folders, files in os.walk(dir):
        for f in files:
            if f.endswith('.html'):
                filename = os.path.join(currentpath, f)

                fr = open(filename, 'r')
                fc = fr.read()
                finds = re.findall(getcharset_pattern, fc)
                if len(finds)>0:
                    htmlCharset = finds[0]
                else:
                    htmlCharset =''
                if htmlCharset == 'gb2312':
                    fc = re.sub(
                        getcharset_pattern, 'content="text/html; charset=utf-8"', fc)
                    shutil.move(filename, filename+"~")
                    with io.open(filename, 'w', encoding='utf-8') as fw:
                        fw.write(fc.decode('gbk'))
                elif htmlCharset == 'utf-8':
                    pass
                elif htmlCharset == 'iso-8859-1':
                    fc = io.open(filename, 'r', encoding='iso-8859-1').read()
                    fc = re.sub(
                        getcharset_pattern, 'content="text/html; charset=utf-8"', fc)
                    shutil.move(filename, filename+"~")
                    with io.open(filename, 'w', encoding='utf-8') as fw:
                        fw.write(h.unescape(fc))
    def stage_data(self):

        # Write script
        remote_script = os.path.join(self.config.solver_dir, "process_synthetics.py")
        with io.open(utilities.get_script_file("process_synthetics"), "r") as fh:
            script_string = fh.readlines()
        script_string.insert(0, "#!{}\n".format(self.config.python_exec))
        self.remote_machine.write_file(remote_script, "".join(script_string))

        # Copy over pickle file.
        info = {"lowpass": self.iteration_info["lowpass"],
                "highpass": self.iteration_info["highpass"],
                "event_list": self.event_info.keys()}
        tmp_pickle = "tmp_pickle.p"
        remote_pickle = os.path.join(self.config.solver_dir, "info.p")
        with io.open(tmp_pickle, "wb") as fh:
            cPickle.dump(info, fh)
        self.remote_machine.put_file(tmp_pickle, remote_pickle)
        os.remove(tmp_pickle)

        # Copy sbatch file.
        remote_sbatch = os.path.join(self.config.solver_dir, "process_synthetics.sbatch")
        with io.open(utilities.get_template_file("sbatch"), "r") as fh:
            sbatch_string = fh.read().format(**self.sbatch_dict)
        self.remote_machine.write_file(remote_sbatch, sbatch_string)
Beispiel #5
0
def newoverwrite(s, filename, verbose=False):
    """Useful for not forcing re-compiles and thus playing nicely with the
    build system.  This is acomplished by not writing the file if the existsing
    contents are exactly the same as what would be written out.

    Parameters
    ----------
    s : str
        string contents of file to possible
    filename : str
        Path to file.
    vebose : bool, optional
        prints extra message

    """
    if os.path.isfile(filename):
        with io.open(filename, 'rb') as f:
            old = f.read()
        if s == old:
            return
    else:
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
    with io.open(filename, 'wb') as f:
        f.write(s.encode())
    if verbose:
        print("  wrote " + filename)
def test_can_report_when_system_locale_is_ascii(monkeypatch):
    import io
    read, write = os.pipe()
    read = io.open(read, 'r', encoding='ascii')
    write = io.open(write, 'w', encoding='ascii')
    monkeypatch.setattr(sys, 'stdout', write)
    reporting.default(u"☃")
    def test_freezer_nested(self):
        cfg = self.given_a_file_in_test_dir('buildout.cfg', '''\
[buildout]
extends= http://example.com/buildout.cfg
''')
        expected1 = '''\
[buildout]
extends= external_buildouts/example.com_buildout.cfg
'''
        expected2 = '''\
# File managed by freeze command from buildout_helpers
# Changes will be overwritten
# ETAG: None
# ORIGIN: http://example.com/buildout.cfg
[buildout]
extends= example.com_buildout2.cfg
'''
        with requests_mock.mock() as m:
            m.get('http://example.com/buildout.cfg', text='''[buildout]
extends= buildout2.cfg
''')
            m.get('http://example.com/buildout2.cfg', text='''[buildout]''')
            freeze(Config(cfg))

        abs_dir, _ = os.path.split(cfg)
        new_file_contents = open(os.path.join(abs_dir,
                                              'external_buildouts',
                                              'example.com_buildout.cfg'),
                                 'r').read()
        old_file_contents = open(cfg, 'r').read()
        self.assertEqual(old_file_contents, expected1)
        self.assertEqual(new_file_contents, expected2)
    def stemminig(self, doc):

        doc1 = doc

        suffFileDirec = "suffixes.txt"
        try:
            suffixFile = io.open(suffFileDirec, "r", encoding='utf-8').read()
        except UnicodeDecodeError:
            suffixFile = io.open(suffFileDirec, "r", encoding='latin-1').read()

        suffixList = suffixFile.split()

        doc.sort()
        wordList = doc

        i = 0
        while (i < len(wordList) - 1):
            j = i + 1

            while (j < len(wordList)):

                benchWord = wordList[i]
                checkWord = wordList[j]
                benchCharList = list(benchWord)
                checkCharList = list(checkWord)

                if (checkWord.startswith(benchWord)):
                    for suffix in suffixList:
                        if checkWord.endswith(suffix):
                            print(benchWord+" = "+checkWord)
                            wordList[j] = benchWord
                            break
                j += 1
            i += 1
def test_install_pre_commit(tmpdir_factory):
    path = git_dir(tmpdir_factory)
    runner = Runner(path)
    ret = install(runner)
    assert ret == 0
    assert os.path.exists(runner.pre_commit_path)
    pre_commit_contents = io.open(runner.pre_commit_path).read()
    pre_commit_script = resource_filename('hook-tmpl')
    expected_contents = io.open(pre_commit_script).read().format(
        sys_executable=sys.executable,
        hook_type='pre-commit',
        pre_push=''
    )
    assert pre_commit_contents == expected_contents
    assert os.access(runner.pre_commit_path, os.X_OK)

    ret = install(runner, hook_type='pre-push')
    assert ret == 0
    assert os.path.exists(runner.pre_push_path)
    pre_push_contents = io.open(runner.pre_push_path).read()
    pre_push_tmpl = resource_filename('pre-push-tmpl')
    pre_push_template_contents = io.open(pre_push_tmpl).read()
    expected_contents = io.open(pre_commit_script).read().format(
        sys_executable=sys.executable,
        hook_type='pre-push',
        pre_push=pre_push_template_contents,
    )
    assert pre_push_contents == expected_contents
Beispiel #10
0
    def export(self, ole, export_path):
        if not self.safe_makedir(export_path):
            return

        for stream in ole.listdir(streams=True, storages=True):
            try:
                stream_content = ole.openstream(stream).read()
            except Exception as e:
                self.log('warning', "Unable to open stream {0}: {1}".format(string_clean('/'.join(stream)), e))
                continue

            store_path = os.path.join(export_path, string_clean('-'.join(stream)))

            flash_objects = self.detect_flash(ole.openstream(stream).read())
            if len(flash_objects) > 0:
                self.log('info', "Saving Flash objects...")
                count = 1

                for header, flash_object in flash_objects:
                    self.print_swf_header_info(header)
                    save_path = '{0}-FLASH-Decompressed{1}'.format(store_path, count)
                    with open(save_path, 'wb') as flash_out:
                        flash_out.write(flash_object)

                    self.log('item', "Saved Decompressed Flash File to {0}".format(save_path))
                    count += 1

            with open(store_path, 'wb') as out:
                out.write(stream_content)

            self.log('info', "Saved stream to {0}".format(store_path))

        ole.close()
def decrypt(key, ct_path="ciphertext.enc", savePT="plaintext.dec"):
    with open(ct_path, 'rb') as input:
        u = dill.load(input)
        ciphertext = dill.load(input)
    v = modexp(u, key.x, key.p)

    uv = str(u)+str(v)
    k = SHA224.new(uv.encode('utf-8')).hexdigest().encode('utf-8') #symmetric key for compute the ciphertext with AES
    print("K: "+str(k))

    bs = Blowfish.block_size
    iv = ciphertext[:bs]
    # Remove IV
    ciphertext = ciphertext[bs:]
    print("CT-LEN:"+str(len(ciphertext)))
    cipher = Blowfish.new(k, Blowfish.MODE_CBC, iv)
    plaintext = cipher.decrypt(ciphertext)
    # Remove padding
    last_byte = plaintext[-1]
    plaintext = plaintext[:- (last_byte if type(last_byte) is int else ord(last_byte))]

    # Write to file the plaintext decrypted
    #plaintext = plaintext.decode(plaintext, key.iNumBits)
    io.open(savePT,"wb").write(plaintext)

    return plaintext
def merge_data_from_crawl(): #To refactor far too long
    category_file = sorted(os.listdir(os.getcwd()+"/data/data_from_crawl"))
    movies_category = [file_name for file_name in category_file if not any(c.isdigit() for c in file_name)]
    for category in movies_category:
        if category.endswith('.json'):
            category = category[:-5]
        if category.endswith('die'):
            movies_with_part = [file_name for file_name in category_file if (len(file_name) < 30 and category in file_name)]
        else:
            movies_with_part = [file_name for file_name in category_file if category in file_name]
        if len(movies_with_part) > 1:
            all_part_data = {}
            for movies_part in movies_with_part:
                with open('data/data_from_crawl/' + movies_part) as part:
                    data = json.load(part)
                for movie_title, movie_data in data.iteritems():
                    all_part_data[movie_title] = movie_data
            with io.open("data/data_to_use/" + movies_with_part[0] , "w+", encoding='utf8') as all_part:
                output = json.dumps(all_part_data, ensure_ascii=False, encoding='utf8')
                all_part.write(unicode(output))
        if len(movies_with_part) == 1 and  movies_with_part[0][0] != '.':
            with open('data/data_from_crawl/' + movies_with_part[0]) as part:
                data = json.load(part)
            with io.open("data/data_to_use/" + movies_with_part[0] , "w+", encoding='utf8') as all_part:
                output = json.dumps(data, ensure_ascii=False, encoding='utf8')
                all_part.write(unicode(output))
Beispiel #13
0
    def __init__(self, sensor_name, bus, addr=_DEFAULT_ADDRESS):
        '''Initializes the sensor with some default values.

        bus: The SMBus descriptor on which this sensor is attached.
        addr: The I2C bus address
            (default is 0x40).

        '''
        
        SensorBase.__init__(
            self, 
            sensor_name=sensor_name)
        
        self._ior = io.open('/dev/i2c-' + str(bus), 'rb', buffering=0)
        self._iow = io.open('/dev/i2c-' + str(bus), 'wb', buffering=0)
        fcntl.ioctl(self._ior, _I2C_SLAVE, addr)
        fcntl.ioctl(self._iow, _I2C_SLAVE, addr)
        
        self._resolution = RESOLUTION_12BITS
        self._onchip_heater = _DISABLE_ONCHIP_HEATER
        self._otp_reload = _DISABLE_OTP_RELOAD

        self._use_temperature = True

        self._reset()
        self._reconfigure()
def edit_changes(file, data, msg="Update to version [% VERSION_DEB %]"):
  """
    Prepend the most simplistic but syntactically correct debian changelog entry.
    No changelog body text.
    If an identical entry (except for the timestamp) is already there,
    we just update the timestamp.
  """
  entry_fmt = """-------------------------------------------------------------------
[% DATE_RPM %] - [% MAINTAINER_EMAIL %]

- """+msg+"""

"""
  entry = subst_variables(entry_fmt, data)

  txt = ''
  if os.path.isfile(file):
    txt = open(file, encoding="latin-1").read()

  txt2 = re.sub(r'^.*\n','', txt)
  txt2 = re.sub(r'^.*\n','', txt2)	# cut away first 2 lines.
  ent2 = re.sub(r'^.*\n','', entry)
  ent2 = re.sub(r'^.*\n','', ent2)	# cut away first 2 lines.
  if txt2.startswith(ent2):
    txt = txt2[len(ent2):]
  txt = entry + txt
  out=open(file, "w", encoding="latin-1")
  out.write(txt)
  out.close()
Beispiel #15
0
def batch_sort(input, output, buffer_size=32000, tempdirs=None):
    if tempdirs is None:
        tempdirs = []
    if not tempdirs:
        tempdirs.append(gettempdir())

    chunks = []
    try:
        with io.open(input,mode='r',buffering=64*1024, encoding='utf8') as input_file:
            print(u"Opened input {0}".format(input))
            input_iterator = iter(input_file)
            for tempdir in cycle(tempdirs):
                current_chunk = list(islice(input_iterator,buffer_size))
                if not current_chunk:
                    break
                current_chunk.sort(key=keyfunc)
                fname = '%06i' % len(chunks)
                output_chunk = io.open(os.path.join(tempdir,fname),mode='w+',buffering=64*1024, encoding='utf8')
                print(u"Writing tempfile {0}/{1}".format(tempdir, fname))
                chunks.append(output_chunk)
                output_chunk.writelines(current_chunk)
                output_chunk.flush()
                output_chunk.seek(0)
        print(u"Writing outfile {0}".format(output))
        with io.open(output,mode='w',buffering=64*1024, encoding='utf8') as output_file:
            output_file.writelines(merge(*chunks))
    finally:
        for chunk in chunks:
            try:
                chunk.close()
                os.remove(chunk.name)
            except Exception:
                print(u"Exception when closing chunk")
                pass
Beispiel #16
0
def main():
    complrepldict=OrderedDict()
    # STEP 1.1: add all multi-word expressions from a dictionary of the language in question ('s ochtends --> 's_ochtends)
    alldutchwords=[line.strip() for line in open(nlwoordenbestand,mode="r",encoding="utf-8")]
    complrepldict.update(replacespaces(alldutchwords))

    # STEP 1.2: add all own rules (column 0 and 1 from user-generated TAB file)
    complrepldict.update(replaceown(ownreplacements,0,1))
    # STEP 1.3: save output to general replacement list
    with open(outputbestand,mode="w",encoding="utf-8") as fo:
        fo.write(unicode(json.dumps(complrepldict,ensure_ascii=False)))
    print "\nFinished writing",outputbestand
    print "YOU'RE READY WITH THE GENERAL REPLACEMENT LIST!\n"

    # STEP 2.1: Add rules that only have to be replaced if already one replacement according to rule above has taken
    # place. Example: Jan Smit is replaced by jan_smit (1.2), now, also subsequent mentions of Smit (without Jan)
    # should be replaced by jan_smit
    complrepldict2=OrderedDict()
    complrepldict2.update(replaceownlistoutput(ownreplacements,2,1))
    # STEP 2.2: save output to second-mention-output file
    with open(outputbestand2,mode="w",encoding="utf-8") as fo:
        fo.write(unicode(json.dumps(complrepldict2,ensure_ascii=False)))
    print "\nFinished writing",outputbestand2
    print "YOU'RE READY WITH THE REPLACEMENT LIST FOR SECOND MENTIONS (e.g., LAST NAMES/FULL NAMES)!\n"


    # STEP 3.1: Add rules for replacements that only have to take place if some other expression occurs in the article
    complrepldict3=OrderedDict()
    complrepldict3.update(replaceownindien(ownreplacements,2,1,3))
    
    with open(outputbestand3,mode="w",encoding="utf-8") as fo:
        fo.write(unicode(json.dumps(complrepldict3,ensure_ascii=False)))
    
    print "\nFinished writing",outputbestand3
    print "YOU'RE READY WITH THE REPLACEMENT LIST FOR REPLACEMENTST IN CASE OF INDICATORS BEING PRESENT!\n"
def edit_debchangelog(file, data, msg="Update to version [% VERSION_DEB %]"):
  """
    Prepend the most simplistic but syntactically correct debian changelog entry.
    No changelog body text.
    If an identical entry (except for the timestamp) is already there,
    we just update the timestamp.
  """
  entry_fmt = """[% PACKNAME %] ([% VERSION_DEB %]-[% BUILDRELEASE_DEB %]) stable; urgency=low

  * """+msg+"""

 -- [% MAINTAINER_NAME %] <[% MAINTAINER_EMAIL %]>  """
  entry = subst_variables(entry_fmt, data)

  txt = ''
  if os.path.isfile(file):
    txt = open(file, encoding="latin-1").read()

  if txt.startswith(entry):
    txt = txt[len(entry):]
    txt = re.sub(r'^.*','', txt)		# zap the timestamp
    txt = re.sub(r'^[\s*\n]*','', txt, re.M)	# zap leading newlines and whitespaces
  entry += data['DATE_DEB'] + "\n\n"
  txt = entry + txt
  out=open(file, "w", encoding="latin-1")
  out.write(txt)
  out.close()
Beispiel #18
0
def process_ebuild(ebuild, ops, arch_status=None, verbose=0, quiet=0,
                   dry_run=False, style='color-inline', manifest=False):
	"""Process |ops| for |ebuild|

	Args:
	  ebuild: The ebuild file to operate on & update in place
	  ops: An iterable of operations (Op objects) to perform on |ebuild|
	  arch_status: A dict mapping default arches to their stability; see the
	               load_profile_data function for more details
	  verbose: Be verbose; show various status messages
	  quiet: Be quiet; only show errors
	  dry_run: Do not make any changes to |ebuild|; show what would be done
	  style: The diff style

	Returns:
	  Whether any updates were processed
	"""
	with io.open(ebuild, encoding='utf8') as f:
		updated, content = process_content(
			ebuild, f, ops, arch_status=arch_status,
			verbose=verbose, quiet=quiet, style=style)
		if updated and not dry_run:
			with io.open(ebuild, 'w', encoding='utf8') as f:
				f.writelines(content)
			if manifest:
				subprocess.check_call(['ebuild', ebuild, 'manifest'])
	return updated
Beispiel #19
0
 def __init__(self):
     super(StreamingHttpServer, self).__init__(
             ('', HTTP_PORT), StreamingHttpHandler)
     with io.open('index.html', 'r') as f:
         self.index_template = f.read()
     with io.open('jsmpg.js', 'r') as f:
         self.jsmpg_content = f.read()
Beispiel #20
0
 def testBasicIO(self):
     for chunksize in (1, 2, 3, 4, 5, 15, 16, 17, 31, 32, 33, 63, 64, 65):
         for enc in "ascii", "latin1", "utf8" :# , "utf-16-be", "utf-16-le":
             f = io.open(test_support.TESTFN, "w+", encoding=enc)
             f._CHUNK_SIZE = chunksize
             self.assertEquals(f.write(u"abc"), 3)
             f.close()
             f = io.open(test_support.TESTFN, "r+", encoding=enc)
             f._CHUNK_SIZE = chunksize
             self.assertEquals(f.tell(), 0)
             self.assertEquals(f.read(), u"abc")
             cookie = f.tell()
             self.assertEquals(f.seek(0), 0)
             self.assertEquals(f.read(2), u"ab")
             self.assertEquals(f.read(1), u"c")
             self.assertEquals(f.read(1), u"")
             self.assertEquals(f.read(), u"")
             self.assertEquals(f.tell(), cookie)
             self.assertEquals(f.seek(0), 0)
             self.assertEquals(f.seek(0, 2), cookie)
             self.assertEquals(f.write(u"def"), 3)
             self.assertEquals(f.seek(cookie), cookie)
             self.assertEquals(f.read(), u"def")
             if enc.startswith("utf"):
                 self.multi_line_test(f, enc)
             f.close()
Beispiel #21
0
    def write_manifest(self):
        with open(os.path.join("meta", "def.json"), "r") as tmpl:
            def_json = os.path.join(self.build_dir, "def.json")

            with open(def_json, "wt", encoding="utf-8", newline="\n") as f:
                f.write("\ufeff")
                f.write(re.sub(r"\{(?=[^\w\{])|(?<=[^\w\}])\}", r"\g<0>\g<0>", tmpl.read()).format(**self.config))
Beispiel #22
0
def execute_nb(src, dst, allow_errors=False, timeout=1000, kernel_name=''):
    """
    Execute notebook in `src` and write the output to `dst`

    Parameters
    ----------
    src, dst: str
        path to notebook
    allow_errors: bool
    timeout: int
    kernel_name: str
        defualts to value set in notebook metadata

    Returns
    -------
    dst: str
    """
    import nbformat
    from nbconvert.preprocessors import ExecutePreprocessor

    with io.open(src, encoding='utf-8') as f:
        nb = nbformat.read(f, as_version=4)

    ep = ExecutePreprocessor(allow_errors=allow_errors,
                             timeout=timeout,
                             kernel_name=kernel_name)
    ep.preprocess(nb, resources={})

    with io.open(dst, 'wt', encoding='utf-8') as f:
        nbformat.write(nb, f)
    return dst
    def test_2to3_user_mode(self, test_env):
        settings = dict(
            name='foo',
            packages=['foo'],
            use_2to3=True,
            version='0.0',
        )
        dist = Distribution(settings)
        dist.script_name = 'setup.py'
        cmd = develop(dist)
        cmd.user = 1
        cmd.ensure_finalized()
        cmd.install_dir = site.USER_SITE
        cmd.user = 1
        with contexts.quiet():
            cmd.run()

        # let's see if we got our egg link at the right place
        content = os.listdir(site.USER_SITE)
        content.sort()
        assert content == ['easy-install.pth', 'foo.egg-link']

        # Check that we are using the right code.
        fn = os.path.join(site.USER_SITE, 'foo.egg-link')
        with io.open(fn) as egg_link_file:
            path = egg_link_file.read().split()[0].strip()
        fn = os.path.join(path, 'foo', '__init__.py')
        with io.open(fn) as init_file:
            init = init_file.read().strip()

        expected = 'print("foo")' if six.PY3 else 'print "foo"'
        assert init == expected
Beispiel #24
0
def save_data_4_nn_k_words(dataset, path2OutDir, k_words= -1, tf_idf_vectorizer = None, data_name = "cnn"):
    if os.path.isdir(path2OutDir) is False:
        os.makedirs(path2OutDir)
    if os.path.isdir(path2OutDir + "/content") is False:
        os.makedirs(path2OutDir + "/content")
    if os.path.isdir(path2OutDir + "/summary") is False:
        os.makedirs(path2OutDir + "/summary")

    fo_content = io.open(path2OutDir + "/content/{0}words_{1}line.content".format(k_words,len(dataset)), "w", encoding='utf8')
    fo_sum = io.open(path2OutDir + "/summary/{0}words_{1}line.summary".format(k_words,len(dataset)), "w", encoding='utf8')
    if k_words != -1:
        if tf_idf_vectorizer is None:
            with open("exsum/tf_idf_vectorizer_100_01.pickle", mode="rb") as f:
                tf_idf_vectorizer = pickle.load(f)
        print "Saving ", k_words, " sent ..."
        progress_bar = ProgressBar(len(dataset))
        for filename, content, highlights in dataset:
            selected_sents = select_k_words(content,tf_idf_vectorizer, k_words)
            highlights = filter_sent(highlights)
            fo_content.write(" ".join(selected_sents).replace("\n"," ") + u"\n")
            fo_sum.write(" ".join(highlights).replace("\n"," ") + u"\n")
            progress_bar.Increment()
    else:
        print "Saving all sent ..."
        progress_bar = ProgressBar(len(dataset))
        for filename, content, highlights in dataset:
            highlights = filter_sent(highlights)
            fo_content.write(" ".join(content).replace("\n"," ") + "\n")
            fo_sum.write(" ".join(highlights).replace("\n"," ") + "\n")
            progress_bar.Increment()
    fo_content.close()
    fo_sum.close()
def process_output_file(output_dict, output_filename):
    import json
    import datetime
    import io
    import sys
    
    global total_processed
    global first_sleep
    
    if 'errors' not in output_dict:
        if first_sleep:
            with io.open(output_filename, 'w', encoding='utf-8') as f:
                first_sleep = False
                for tweet in output_dict:
                    f.write(unicode(json.dumps(tweet, ensure_ascii=False)))
                    f.write(u"\n")
                    total_processed+=1
        else:
            with io.open(output_filename, 'a', encoding='utf-8') as f:
                for tweet in output_dict:
                    f.write(unicode(json.dumps(tweet, ensure_ascii=False)))
                    f.write(u"\n")
                    total_processed+=1
                
        timenow     = datetime.datetime.today().strftime("%H:%M:%S")
        msg = "%s processed at %s, rows %d"%(output_filename, timenow, total_processed)
        print msg
        logging.info(msg)
        sys.stdout.flush()
    output_dict = []
Beispiel #26
0
    def test_main(self):
        encoding = sys.getfilesystemencoding()
        # sys.stdout = StringIO()
        
        sys.stdout = io.BytesIO()
        sys.stdin = io.BytesIO(b'hello world')
        sys.stdin.buffer = sys.stdin
        _main([sys.argv[0], '-t', 'zh-CN'])
        self.assertEqual(u'你好世界\n'.encode(encoding), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()
        sys.stdin = io.BytesIO(u'你好'.encode(encoding))
        sys.stdin.buffer = sys.stdin
        _main([sys.argv[0], '-t', 'en'])
        self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()
        sys.stdin = io.BytesIO(b'hello world')
        sys.stdin.buffer = sys.stdin        
        _main([sys.argv[0], '-t', 'zh-CN', '-o', 'utf-8'])
        self.assertEqual(u'你好世界\n'.encode('utf-8'), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()        
        sys.stdin = io.BytesIO(u'你好'.encode('utf-8'))
        sys.stdin.buffer = sys.stdin                
        _main([sys.argv[0], '-t', 'en', '-i', 'utf-8'])
        self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()        
        with open('for_test.tmp', 'w') as f:
            f.write('hello world')
        _main([sys.argv[0], '-t', 'zh-CN', f.name])
        self.assertEqual(u'你好世界\n'.encode(encoding), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()        
        with open('for_test.tmp', 'w') as f:
            f.write('hello world')
        _main([sys.argv[0], '-t', 'zh-CN', '-o', 'utf-8', f.name])
        self.assertEqual(u'你好世界\n'.encode('utf-8'), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()
        with io.open('for_test.tmp', 'w', encoding=encoding) as f:
            f.write(u'你好')
        _main([sys.argv[0], '-t', 'en', f.name])
        self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue())
        
        sys.stdout = io.BytesIO()
        with io.open('for_test.tmp', 'w', encoding='utf-8') as f:
            f.write(u'你好')
        _main([sys.argv[0], '-t', 'en', '-i', 'utf-8', f.name])
        self.assertEqual(u'Hello\n'.encode(encoding), sys.stdout.getvalue())

        sys.stdout = io.BytesIO()        
        with io.open('for_test.tmp', 'w', encoding='utf-8') as f:
            f.write(u'你好')
        with io.open('for_test_2.tmp', 'w', encoding='utf-8') as f2:
            f2.write(u'世界')
            
        _main([sys.argv[0], '-t', 'en', '-i', 'utf-8', f.name, f2.name])
        self.assertEqual(u'Hello\nWorld\n'.encode(encoding), sys.stdout.getvalue())
Beispiel #27
0
 def _get_redirects(self):
     """Determine which i/o streams to attach."""
     input_streams, output_streams = [], []
     # explicit redirects
     # input redirects
     infile_params = self.get('input').split(u':')
     if infile_params[0].upper() in (u'STDIO', u'STDIN'):
         if u'RAW' in (_x.upper() for _x in infile_params):
             input_streams.append(stdin.buffer)
         else:
             input_streams.append(stdin)
     else:
         if len(infile_params) > 1 and infile_params[0].upper() == u'FILE':
             infile = infile_params[1]
         else:
             infile = infile_params[0]
         if infile:
             try:
                 input_streams.append(io.open(infile, 'rb'))
             except EnvironmentError as e:
                 logging.warning(u'Could not open input file %s: %s', infile, e.strerror)
     # output redirects
     outfile_params = self.get('output').split(u':')
     if outfile_params[0].upper() in (u'STDIO', u'STDOUT'):
         if u'RAW' in (_x.upper() for _x in outfile_params):
             output_streams.append(stdout.buffer)
         else:
             output_streams.append(stdout)
     else:
         if len(outfile_params) > 1 and outfile_params[0].upper() == u'FILE':
             outfile_params = outfile_params[1:]
         outfile = outfile_params[0]
         append = len(outfile_params) > 1 and outfile_params[1].lower() == u'append'
         if outfile:
             try:
                 output_streams.append(io.open(outfile, 'ab' if append else 'wb'))
             except EnvironmentError as e:
                 logging.warning(u'Could not open output file %s: %s', outfile, e.strerror)
     # implicit stdio redirects
     # add stdio if redirected or no interface
     if stdin not in input_streams and stdin.buffer not in input_streams:
         if IS_CONSOLE_APP and not stdin.isatty():
             # redirected on console; use bytes stream
             input_streams.append(stdin.buffer)
         elif IS_CONSOLE_APP and not self.interface:
             # no interface & on console; use unicode stream
             input_streams.append(stdin)
     # redirect output as well if input is redirected, but not the other way around
     # this is because (1) GW-BASIC does this from the DOS prompt
     # (2) otherwise we don't see anything - we quit after input closes
     # isatty is also false if we run as a GUI exe, so check that here
     if stdout not in output_streams and stdout.buffer not in output_streams:
         if IS_CONSOLE_APP and (not stdout.isatty() or not stdin.isatty()):
             output_streams.append(stdout.buffer)
         elif IS_CONSOLE_APP and not self.interface:
             output_streams.append(stdout)
     return {
         'output_streams': output_streams,
         'input_streams': input_streams,
     }
Beispiel #28
0
 def file(self, opts, filename, encoding):
     """Return a context manager for writing to.
     
     If you set encoding to "binary" or False, the file is opened in binary
     mode and you should encode the data you write yourself.
     
     """
     if not filename or filename == '-':
         filename, mode = sys.stdout.fileno(), 'w'
     else:
         if filename not in self._seen_filenames:
             self._seen_filenames.add(filename)
             if opts.backup_suffix and os.path.exists(filename):
                 shutil.copy(filename, filename + opts.backup_suffix)
             mode = 'w'
         else:
             mode = 'a'
     if encoding in (False, "binary"):
         f = io.open(filename, mode + 'b')
     else:
         f = io.open(filename, mode, encoding=encoding)
     try:
         yield f
     finally:
         f.close()
Beispiel #29
0
    def do_apply(self):
        if not self.netconf.locked("dhcp") or self.netconf.ip4_changed:
            if self.netconf.ip4_changed:
                self.do_remove()

            dhcp_config, existing_subnet = self._read_dhcp_config()

            subnet = self._generate_subnet_config()

            # if subnet != self.existing_subnet:
            f = open(DHCP_CONFIG_FILE, "w")
            f.write(dhcp_config)
            f.write(subnet)
            f.close()

            cmd = [have("dhcpd3") or have("dhcpd"), "-pf", "/var/run/dhcpd-server/dhcp.pan1.pid", "pan1"]
            p = Popen(cmd)

            ret = p.wait()

            if ret == 0:
                dprint("dhcpd started correctly")
                f = open("/var/run/dhcp-server/dhcpd.pan1.pid", "r")
                self.pid = int(f.read())
                f.close()
                dprint("pid", self.pid)
                self.netconf.lock("dhcp")
            else:
                raise Exception("dhcpd failed to start. Check the system log for errors")
Beispiel #30
0
 def test_close_flushes(self):
     f = io.open(test_support.TESTFN, "wb")
     f.write(b"xxx")
     f.close()
     f = io.open(test_support.TESTFN, "rb")
     self.assertEqual(f.read(), b"xxx")
     f.close()
Beispiel #31
0
		print("You could try running under WSL.", file=sys.stderr)
	sys.exit(1)

def dirty():
	for extra in ([], ['--cached'],):
		cmd = ['git', 'diff-index', '--quiet', 'HEAD'] + extra
		try:
			check_call(cmd)
		except CalledProcessError as e:
			if e.returncode == 1:
				return '.dirty'
			else:
				raise
	return ''

with open('README.md', 'r', encoding='utf-8') as fh:
	long_description = []
	for line in fh:
		if not line.startswith('[PyPI]') and not line.startswith('`pip'):
			long_description.append(line)
	long_description = ''.join(long_description)

if exists('PKG-INFO'):
	with open('PKG-INFO', 'r', encoding='utf-8') as fh:
		for line in fh:
			if line.startswith('Version: '):
				version = line.strip().split()[1]
				break
else:
	version = datetime.utcnow().strftime('%Y.%m.%d')
	env_version = os.environ.get('ACCELERATOR_BUILD_VERSION')
Beispiel #32
0
 def from_indexpath(cls, indexpath):
     with io.open(indexpath, 'rb') as file:
         return pickle.load(file)
Beispiel #33
0
def write_pattern_file(template_rendered):
    with io.open(os.path.join(DIR, 'pattern.py'), mode='w',
                 encoding='utf-8') as fh:
        fh.write(template_rendered)
Beispiel #34
0
def read_template():
    with io.open(os.path.join(DIR, 'pattern_template.py'),
                 mode='r',
                 encoding='utf-8') as fh:
        return fh.read()
Beispiel #35
0
import folium
import pandas, io
data = pandas.read_csv("Volcanoes.txt")
data_json = io.open('world.json', 'r', encoding='utf-8-sig').read()
map = folium.Map(location=[38.2, -99.1], zoom_start=6, titles="Mapbox Bright")
fgv = folium.FeatureGroup(name="Volcanoes")
lat = list(data['LAT'])
lon = list(data['LON'])
elev = list(data['ELEV'])


def color_producer(elevation):
    if elevation < 1000:
        return 'green'
    elif 1000 <= elevation < 3000:
        return 'orange'
    else:
        return 'red'


for i, j, k in zip(lat, lon, elev):
    fgv.add_child(
        folium.CircleMarker(location=[i, j],
                            radius=6,
                            popup=k,
                            fill_color=color_producer(k),
                            color='grey',
                            fill_opacity=0.7))

#fg.add_child(folium.GeoJson(data=open('world.json', 'r', encoding='utf-8-sig'), style_function=lambda x: {'fillColor':'yellow'}))
fgp = folium.FeatureGroup(name="Population")
Beispiel #36
0
 def __call__(self, url, filename=None):
     self.fetched = url
     fn = url_to_filename(url)
     with open(fn, 'r', encoding="utf8") as f:
         content = f.read()
     return content
Beispiel #37
0
        pass

    def run(self):
        rcfile = os.path.abspath('.pylintrc')
        standaloneModules = [m for m in []]
        cli_options = ['-E'] if self.errorsonly else []
        cli_options.append('--output-format={0}'.format(self.format))
        errno = subprocess.call([sys.executable, '-m', 'pylint', '--rcfile={}'.format(rcfile), '--output-format=colorized'] +
                                cli_options + ['pychroot'] + standaloneModules)
        raise SystemExit(errno)

test_requirements = ['pytest']
if sys.hexversion < 0x03030000:
    test_requirements.append('mock')

with open('README.rst', 'r', encoding='utf-8') as f:
    readme = f.read()
with open('NEWS.rst', 'r', encoding='utf-8') as f:
    news = f.read()

setup(
    name='pychroot',
    version=pkgdist.version(),
    description='a python library and cli tool that simplify chroot handling',
    long_description=readme + '\n\n' + news,
    author='Tim Harder',
    author_email='*****@*****.**',
    url='https://github.com/pkgcore/pychroot',
    license='BSD',
    packages=find_packages(),
    scripts=os.listdir('bin'),
Beispiel #38
0
def get_from_cache(url, cache_dir=None, force_download=False, proxies=None, etag_timeout=10, resume_download=False):
    """
    Given a URL, look for the corresponding dataset in the local cache.
    If it's not there, download it. Then return the path to the cached file.
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
    if sys.version_info[0] == 3 and isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    if sys.version_info[0] == 2 and not isinstance(cache_dir, str):
        cache_dir = str(cache_dir)

    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)

    # Get eTag to add to filename, if it exists.
    if url.startswith("s3://"):
        etag = s3_etag(url, proxies=proxies)
    else:
        try:
            response = requests.head(
                url, allow_redirects=True, proxies=proxies, timeout=etag_timeout)
            if response.status_code != 200:
                etag = None
            else:
                etag = response.headers.get("ETag")
        except (EnvironmentError, requests.exceptions.Timeout):
            etag = None

    if sys.version_info[0] == 2 and etag is not None:
        etag = etag.decode('utf-8')
    filename = url_to_filename(url, etag)

    # get cache path to put the file
    cache_path = os.path.join(cache_dir, filename)

    # If we don't have a connection (etag is None) and can't identify the file
    # try to get the last downloaded one
    if not os.path.exists(cache_path) and etag is None:
        matching_files = fnmatch.filter(os.listdir(cache_dir), filename + '.*')
        matching_files = list(
            filter(lambda s: not s.endswith('.json'), matching_files))
        if matching_files:
            cache_path = os.path.join(cache_dir, matching_files[-1])

    if resume_download:
        incomplete_path = cache_path + '.incomplete'

        @contextmanager
        def _resumable_file_manager():
            with open(incomplete_path, 'a+b') as f:
                yield f
            os.remove(incomplete_path)
        temp_file_manager = _resumable_file_manager
        if os.path.exists(incomplete_path):
            resume_size = os.stat(incomplete_path).st_size
        else:
            resume_size = 0
    else:
        temp_file_manager = tempfile.NamedTemporaryFile
        resume_size = 0

    if not os.path.exists(cache_path) or force_download:
        # Download to temporary file, then copy to cache dir once finished.
        # Otherwise you get corrupt cache entries if the download gets interrupted.
        with temp_file_manager() as temp_file:
            logger.info(
                "%s not found in cache or force_download set to True, downloading to %s", url, temp_file.name)

            # GET file object
            if url.startswith("s3://"):
                if resume_download:
                    logger.warn(
                        'Warning: resumable downloads are not implemented for "s3://" urls')
                s3_get(url, temp_file, proxies=proxies)
            else:
                http_get(url, temp_file, proxies=proxies,
                         resume_size=resume_size)

            # we are copying the file before closing it, so flush to avoid truncation
            temp_file.flush()
            # shutil.copyfileobj() starts at the current position, so go to the start
            temp_file.seek(0)

            logger.info("copying %s to cache at %s",
                        temp_file.name, cache_path)
            with open(cache_path, 'wb') as cache_file:
                shutil.copyfileobj(temp_file, cache_file)

            logger.info("creating metadata file for %s", cache_path)
            meta = {'url': url, 'etag': etag}
            meta_path = cache_path + '.json'
            with open(meta_path, 'w') as meta_file:
                output_string = json.dumps(meta)
                if sys.version_info[0] == 2 and isinstance(output_string, str):
                    # The beauty of python 2
                    output_string = unicode(output_string, 'utf-8')
                meta_file.write(output_string)

            logger.info("removing temp file %s", temp_file.name)

    return cache_path
Beispiel #39
0
    """

    description = "do nothing"
    user_options = []

    def initialize_options(self):
        pass

    def finalize_options(self):
        pass

    def run(self):
        pass


with open("README.rst", "r", encoding="utf-8") as readme:
    long_description = readme.read()


setup(
    name='zopfli',
    version='0.1.4',
    author='Adam DePrince',
    author_email='*****@*****.**',
    maintainer='Cosimo Lupo',
    maintainer_email='*****@*****.**',
    description='Zopfli module for python',
    long_description=long_description,
    ext_modules=[
        Extension('zopfli.zopfli',
            sources=[
Beispiel #40
0
 def _resumable_file_manager():
     with open(incomplete_path, 'a+b') as f:
         yield f
     os.remove(incomplete_path)
Beispiel #41
0
# What packages are optional?
EXTRAS = {
    # "fancy feature": ["django"],
}

# The rest you shouldn"t have to touch too much :)
# ------------------------------------------------
# Except, perhaps the License and Trove Classifiers!
# If you do change the License, remember to change the Trove Classifier for
# that!
HERE = os.path.abspath(os.path.dirname(__file__))

# Import the README and use it as the long-description.
# Note: this will only work if "README.md" is present in your MANIFEST.in file!
try:
    with io.open(os.path.join(HERE, "README.md"), encoding="utf-8") as f:
        LONG_DESCRIPTION = "\n" + f.read()
except FileNotFoundError:
    LONG_DESCRIPTION = DESCRIPTION

# Load the package"s __version__.py module as a dictionary.
ABOUT = {}
if not VERSION:
    PROJECT_SLUG = NAME.lower().replace("-", "_").replace(" ", "_")
    with open(os.path.join(HERE, PROJECT_SLUG, "__version__.py")) as f:
        exec(f.read(), ABOUT)  # pylint: disable=exec-used
else:
    ABOUT["__version__"] = VERSION


class UploadCommand(Command):
            'country_code', 'default', 'best', 'start_date', 'end_date',
            'overlay', 'available_projections', 'attribution', 'icon']:
        thing = properties.get(f)
        if thing is not None:
            converted[f] = thing

    for f in ['min_zoom', 'max_zoom']:
        thing = properties.get(f)
        if thing is not None:
            extent_obj[f] = thing

    if extent_obj:
        converted['extent'] = extent_obj

    return converted

parser = argparse.ArgumentParser(description='Generate legacy json output format from geojosn format sources')
parser.add_argument('files', metavar='F', nargs='+', help='file(s) to process')
parser.add_argument('-b', dest='gen_bbox', action='store_true', help='generate bounding boxes from polygons')
parser.add_argument('-t', dest='tms_only', action='store_true', help='only include tile servers')
parser.add_argument('-r', dest='remove_polygons', action='store_true', help='remove polygons from output, typically used together with -b')

args = parser.parse_args()

features = []
for file in args.files:
    with io.open(file, 'r') as f:
        features.append(convert_json_source(args, json.load(f)))

print(json.dumps(features, sort_keys=True, separators=(',', ':'), ensure_ascii=False).encode('utf-8'))
Beispiel #43
0
    def handle(self, **options):
        target = options.pop('directory')
        # 先获取原内容
        if PY_VER[0] == '2':
            old_file = open('config/default.py')
        else:
            old_file = open('config/default.py', encoding='utf-8')

        # if some directory is given, make sure it's nicely expanded
        top_dir = path.abspath(path.expanduser(target))
        if not path.exists(top_dir):
            raise CommandError("Destination directory '%s' does not "
                               "exist, please init first." % top_dir)
        if not path.exists(path.join(top_dir, 'manage.py')):
            raise CommandError("Current directory '%s' is not "
                               "a django project dir, please init first. "
                               "(bk-admin init ${app_code})" % top_dir)

        base_subdir = 'weixin_template'

        append_file_tuple = (('', 'requirements.txt'), )

        # Setup a stub settings environment for template rendering
        if not settings.configured:
            settings.configure()
            django.setup()

        template_dir = path.join(blueapps.__path__[0], 'conf', base_subdir)
        run_ver = None
        conf_file = open(path.join(os.getcwd(), 'config', '__init__.py'))
        for line in conf_file.readlines():
            if line.startswith('RUN_VER'):
                run_ver = line[11:-2]
        conf_file.close()

        prefix_length = len(template_dir) + 1

        for root, dirs, files in os.walk(template_dir):

            relative_dir = root[prefix_length:]

            target_dir = path.join(top_dir, relative_dir)
            if not path.exists(target_dir):
                os.mkdir(target_dir)

            flag = root.endswith('sites')
            for dirname in dirs[:]:
                if (dirname.startswith('.') or  # noqa
                        dirname == '__pycache__' or  # noqa
                    (flag and dirname != run_ver)):
                    dirs.remove(dirname)

            for filename in files:
                if filename.endswith(('.pyo', '.pyc', '.py.class', '.json')):
                    # Ignore some files as they cause various breakages.
                    continue
                old_path = path.join(root, filename)
                new_path = path.join(top_dir, relative_dir, filename)
                for old_suffix, new_suffix in self.rewrite_template_suffixes:
                    if new_path.endswith(old_suffix):
                        new_path = new_path[:-len(old_suffix)] + new_suffix
                        break  # Only rewrite once

                with io.open(old_path, 'rb') as template_file:
                    content = template_file.read()
                w_mode = 'wb'
                for _root, _filename in append_file_tuple:
                    if _root == relative_dir and _filename == filename:
                        w_mode = 'ab'
                with io.open(new_path, w_mode) as new_file:
                    new_file.write(content)

                try:
                    shutil.copymode(old_path, new_path)
                    self.make_writeable(new_path)
                except OSError:
                    self.stderr.write(
                        "Notice: Couldn't set permission bits on %s. You're "
                        "probably using an uncommon filesystem setup. No "
                        "problem." % new_path, self.style.NOTICE)
        # 修改文件
        modify_default_file(old_file)
Beispiel #44
0
from io import open
from setuptools import setup, find_packages

# from pip.req import parse_requirements

with open("requirements.txt") as f:
    install_requires = f.read().strip().split("\n")

setup(
    name="fast_bert",
    version="1.6.5",
    description="AI Library using BERT",
    author="Kaushal Trivedi",
    author_email="*****@*****.**",
    license="Apache2",
    url="https://github.com/kaushaltrivedi/fast-bert",
    long_description=open("README.md", "r", encoding="utf-8").read(),
    long_description_content_type="text/markdown",
    keywords="BERT NLP deep learning google",
    packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
    install_requires=install_requires,
    classifiers=[
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: Apache Software License",
        "Programming Language :: Python :: 3",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
    ],
    zip_safe=False,
)
 def instantiate_from(self, filename):
     datadir = os.environ.get('FHIR_UNITTEST_DATADIR') or ''
     with io.open(os.path.join(datadir, filename), 'r', encoding='utf-8') as handle:
         js = json.load(handle)
         self.assertEqual("ResearchDefinition", js["resourceType"])
     return researchdefinition.ResearchDefinition(js)
Beispiel #46
0
		browser.get(song['url'])

		# 先评论
		while True:
			easy2hide_notice = browser.find_element_by_css_selector('.easy2hide_notice')
			if easy2hide_notice:
				time.sleep(10)
				now_time = datetime.datetime.now()
				now_time_str = datetime.datetime.strftime(now_time,'%Y%m%d_%H%M%S')
				comment = browser.find_element_by_css_selector('#comment')
				comment.send_keys(now_time_str)
				comment.submit()
			else:
				break

		indexFile = io.open(indexPath, 'a', encoding='utf-8')
		indexFile.write('\n')
		indexFile.close()

		imgELs = browser.find_elements_by_css_selector('.wp-caption.aligncenter a')
		# 遍历图
		for imgEL in imgELs:
			# imgName = imgEL.get_attribute('title') # 图片名
			imgURL = imgEL.get_attribute('href')
			imgPath = os.path.join(songPath, os.path.basename(imgURL))

			indexFile = io.open(indexPath, 'a', encoding='utf-8')
			indexFile.write(imgPath+'\n')
			indexFile.close()

			# check file
def read(filename):
    filename = os.path.join(os.path.dirname(__file__), filename)
    text_type = type(u"")
    with io.open(filename, mode="r", encoding='utf-8') as fd:
        return re.sub(text_type(r':[a-z]+:`~?(.*?)`'), text_type(r'``\1``'), fd.read())
catalog = '.'  # '/' - from the root, 'cat', '.' - in file's directory
patterns = {
    r'<=>': "$\\\\Leftrightarrow$",
    r'<=': "$\\\\Leftarrow$",
    r'=>': "$\\\\Rightarrow$",
    r'\|->': "\\\\mapsto ",
    r'->': '\\\\rightarrow ',
}
for (dirpath, dirnames, filenames) in os.walk(catalog):
    for filename in filenames:
        print('File under consideration', filename)
        name = re.search('\.(.+)$', filename)
        if name.group(1) == 'tex':
            import io
            with io.open(os.path.join(dirpath, filename),
                         'r',
                         encoding='utf-8',
                         errors='replace') as file:
                text = file.read()
            print('- recognized and open an applicable file')
            for pattern, replacement in patterns.items():
                pattern = re.compile(pattern, flags=0)
                textnew = re.sub(pattern, replacement, text)
                text = textnew
                print('replacement', str(pattern), replacement)
            print(text)
            file = open(os.path.join(dirpath, filename + '.re.tex'), 'wb')
            file.write(text.encode('utf8'))
            file.close()
        else:
            print('File {} doest\'t match a criteria and is rejected'.format(
                filename))
def write_predictions(all_examples, all_features, all_results, n_best_size,
                      max_answer_length, do_lower_case, output_prediction_file,
                      output_nbest_file, output_null_log_odds_file,
                      verbose_logging, version_2_with_negative,
                      null_score_diff_threshold):
    """Write final predictions to the json file and log-odds of null if needed."""
    logger.info("Writing predictions to: %s" % (output_prediction_file))
    logger.info("Writing nbest to: %s" % (output_nbest_file))

    example_index_to_features = collections.defaultdict(list)
    for feature in all_features:
        example_index_to_features[feature.example_index].append(feature)

    unique_id_to_result = {}
    for result in all_results:
        unique_id_to_result[result.unique_id] = result

    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
        "PrelimPrediction", [
            "feature_index", "start_index", "end_index", "start_logit",
            "end_logit"
        ])

    all_predictions = collections.OrderedDict()
    all_nbest_json = collections.OrderedDict()
    scores_diff_json = collections.OrderedDict()

    for (example_index, example) in enumerate(all_examples):
        features = example_index_to_features[example_index]

        prelim_predictions = []
        # keep track of the minimum score of null start+end of position 0
        score_null = 1000000  # large and positive
        min_null_feature_index = 0  # the paragraph slice with min null score
        null_start_logit = 0  # the start logit at the slice with min null score
        null_end_logit = 0  # the end logit at the slice with min null score
        for (feature_index, feature) in enumerate(features):
            result = unique_id_to_result[feature.unique_id]
            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
            # if we could have irrelevant answers, get the min score of irrelevant
            if version_2_with_negative:
                feature_null_score = result.start_logits[
                    0] + result.end_logits[0]
                if feature_null_score < score_null:
                    score_null = feature_null_score
                    min_null_feature_index = feature_index
                    null_start_logit = result.start_logits[0]
                    null_end_logit = result.end_logits[0]
            for start_index in start_indexes:
                for end_index in end_indexes:
                    # We could hypothetically create invalid predictions, e.g., predict
                    # that the start of the span is in the question. We throw out all
                    # invalid predictions.
                    if start_index >= len(feature.tokens):
                        continue
                    if end_index >= len(feature.tokens):
                        continue
                    if start_index not in feature.token_to_orig_map:
                        continue
                    if end_index not in feature.token_to_orig_map:
                        continue
                    if not feature.token_is_max_context.get(
                            start_index, False):
                        continue
                    if end_index < start_index:
                        continue
                    length = end_index - start_index + 1
                    if length > max_answer_length:
                        continue
                    prelim_predictions.append(
                        _PrelimPrediction(
                            feature_index=feature_index,
                            start_index=start_index,
                            end_index=end_index,
                            start_logit=result.start_logits[start_index],
                            end_logit=result.end_logits[end_index]))
        if version_2_with_negative:
            prelim_predictions.append(
                _PrelimPrediction(
                    feature_index=min_null_feature_index,
                    start_index=0,
                    end_index=0,
                    start_logit=null_start_logit,
                    end_logit=null_end_logit))
        prelim_predictions = sorted(
            prelim_predictions,
            key=lambda x: (x.start_logit + x.end_logit),
            reverse=True)

        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
            "NbestPrediction", ["text", "start_logit", "end_logit"])

        seen_predictions = {}
        nbest = []
        for pred in prelim_predictions:
            if len(nbest) >= n_best_size:
                break
            feature = features[pred.feature_index]
            if pred.start_index > 0:  # this is a non-null prediction
                tok_tokens = feature.tokens[pred.start_index:(
                    pred.end_index + 1)]
                orig_doc_start = feature.token_to_orig_map[pred.start_index]
                orig_doc_end = feature.token_to_orig_map[pred.end_index]
                orig_tokens = example.doc_tokens[orig_doc_start:(
                    orig_doc_end + 1)]
                tok_text = " ".join(tok_tokens)

                # De-tokenize WordPieces that have been split off.
                tok_text = tok_text.replace(" ##", "")
                tok_text = tok_text.replace("##", "")

                # Clean whitespace
                tok_text = tok_text.strip()
                tok_text = " ".join(tok_text.split())
                orig_text = " ".join(orig_tokens)

                final_text = get_final_text(tok_text, orig_text, do_lower_case,
                                            verbose_logging)
                if final_text in seen_predictions:
                    continue

                seen_predictions[final_text] = True
            else:
                final_text = ""
                seen_predictions[final_text] = True

            nbest.append(
                _NbestPrediction(
                    text=final_text,
                    start_logit=pred.start_logit,
                    end_logit=pred.end_logit))
        # if we didn't include the empty option in the n-best, include it
        if version_2_with_negative:
            if "" not in seen_predictions:
                nbest.append(
                    _NbestPrediction(
                        text="",
                        start_logit=null_start_logit,
                        end_logit=null_end_logit))

            # In very rare edge cases we could only have single null prediction.
            # So we just create a nonce prediction in this case to avoid failure.
            if len(nbest) == 1:
                nbest.insert(
                    0,
                    _NbestPrediction(
                        text="empty", start_logit=0.0, end_logit=0.0))

        # In very rare edge cases we could have no valid predictions. So we
        # just create a nonce prediction in this case to avoid failure.
        if not nbest:
            nbest.append(
                _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))

        assert len(nbest) >= 1

        total_scores = []
        best_non_null_entry = None
        for entry in nbest:
            total_scores.append(entry.start_logit + entry.end_logit)
            if not best_non_null_entry:
                if entry.text:
                    best_non_null_entry = entry

        probs = _compute_softmax(total_scores)

        nbest_json = []
        for (i, entry) in enumerate(nbest):
            output = collections.OrderedDict()
            output["text"] = entry.text
            output["probability"] = probs[i]
            output["start_logit"] = entry.start_logit
            output["end_logit"] = entry.end_logit
            nbest_json.append(output)

        assert len(nbest_json) >= 1

        if not version_2_with_negative:
            all_predictions[example.qas_id] = nbest_json[0]["text"]
        else:
            # predict "" iff the null score - the score of best non-null > threshold
            score_diff = score_null - best_non_null_entry.start_logit - (
                best_non_null_entry.end_logit)
            scores_diff_json[example.qas_id] = score_diff
            if score_diff > null_score_diff_threshold:
                all_predictions[example.qas_id] = ""
            else:
                all_predictions[example.qas_id] = best_non_null_entry.text
        all_nbest_json[example.qas_id] = nbest_json

    with open(output_prediction_file, "w") as writer:
        writer.write(json.dumps(all_predictions, indent=4) + "\n")

    with open(output_nbest_file, "w") as writer:
        writer.write(json.dumps(all_nbest_json, indent=4) + "\n")

    if version_2_with_negative:
        with open(output_null_log_odds_file, "w") as writer:
            writer.write(json.dumps(scores_diff_json, indent=4) + "\n")

    return all_predictions
from flask import Flask
from flask import request
from flask_cors import CORS

import numpy as np
import random
import io
import pickle

app = Flask(__name__)
CORS(app)

model_1 = keras.models.load_model('/Users/kushalvajrala/ProgrammingProjects/DrakeLyricsGenerator/DrakeGenerator/model1')

path = '/Users/kushalvajrala/ProgrammingProjects/DrakeLyricsGenerator/DrakeGenerator/drake_dataset/drake_lyrics.txt'
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display

chars = sorted(list(set(text)))
#char_indices = dict((c, i) for i, c in enumerate(chars))
#indices_char = dict((i, c) for i, c in enumerate(chars))
diversity = 0.5



file_name1 = "chari"
file_name2 = "ichar"
outfile1 = open(file_name1, "rb")
outfile2 = open(file_name2, "rb")
Beispiel #51
0
# 'Development Status :: 4 - Beta'
# 'Development Status :: 5 - Production/Stable'
release_status = 'Development Status :: 5 - Production/Stable'
dependencies = [
    'google-api-core[grpc] >= 1.4.1, < 2.0.0dev',
]
extras = {
}


# Setup boilerplate below this line.

package_root = os.path.abspath(os.path.dirname(__file__))

readme_filename = os.path.join(package_root, 'README.rst')
with io.open(readme_filename, encoding='utf-8') as readme_file:
    readme = readme_file.read()

# Only include packages under the 'google' namespace. Do not include tests,
# benchmarks, etc.
packages = [
    package for package in setuptools.find_packages()
    if package.startswith('google')]

# Determine which namespaces are needed.
namespaces = ['google']
if 'google.cloud' in packages:
    namespaces.append('google.cloud')


setuptools.setup(
def write_predictions_extended(
        all_examples, all_features, all_results, n_best_size,
        max_answer_length, output_prediction_file, output_nbest_file,
        output_null_log_odds_file, orig_data_file, start_n_top, end_n_top,
        version_2_with_negative, tokenizer, verbose_logging):
    """ XLNet write prediction logic (more complex than Bert's).
        Write final predictions to the json file and log-odds of null if needed.
        Requires utils_squad_evaluate.py
    """
    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
        "PrelimPrediction", [
            "feature_index", "start_index", "end_index", "start_log_prob",
            "end_log_prob"
        ])

    _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
        "NbestPrediction", ["text", "start_log_prob", "end_log_prob"])

    logger.info("Writing predictions to: %s", output_prediction_file)
    # logger.info("Writing nbest to: %s" % (output_nbest_file))

    example_index_to_features = collections.defaultdict(list)
    for feature in all_features:
        example_index_to_features[feature.example_index].append(feature)

    unique_id_to_result = {}
    for result in all_results:
        unique_id_to_result[result.unique_id] = result

    all_predictions = collections.OrderedDict()
    all_nbest_json = collections.OrderedDict()
    scores_diff_json = collections.OrderedDict()

    for (example_index, example) in enumerate(all_examples):
        features = example_index_to_features[example_index]

        prelim_predictions = []
        # keep track of the minimum score of null start+end of position 0
        score_null = 1000000  # large and positive

        for (feature_index, feature) in enumerate(features):
            result = unique_id_to_result[feature.unique_id]

            cur_null_score = result.cls_logits

            # if we could have irrelevant answers, get the min score of irrelevant
            score_null = min(score_null, cur_null_score)

            for i in range(start_n_top):
                for j in range(end_n_top):
                    start_log_prob = result.start_top_log_probs[i]
                    start_index = result.start_top_index[i]

                    j_index = i * end_n_top + j

                    end_log_prob = result.end_top_log_probs[j_index]
                    end_index = result.end_top_index[j_index]

                    # We could hypothetically create invalid predictions, e.g., predict
                    # that the start of the span is in the question. We throw out all
                    # invalid predictions.
                    if start_index >= feature.paragraph_len - 1:
                        continue
                    if end_index >= feature.paragraph_len - 1:
                        continue

                    if not feature.token_is_max_context.get(
                            start_index, False):
                        continue
                    if end_index < start_index:
                        continue
                    length = end_index - start_index + 1
                    if length > max_answer_length:
                        continue

                    prelim_predictions.append(
                        _PrelimPrediction(
                            feature_index=feature_index,
                            start_index=start_index,
                            end_index=end_index,
                            start_log_prob=start_log_prob,
                            end_log_prob=end_log_prob))

        prelim_predictions = sorted(
            prelim_predictions,
            key=lambda x: (x.start_log_prob + x.end_log_prob),
            reverse=True)

        seen_predictions = {}
        nbest = []
        for pred in prelim_predictions:
            if len(nbest) >= n_best_size:
                break
            feature = features[pred.feature_index]

            # XLNet un-tokenizer
            # Let's keep it simple for now and see if we need all this later.
            #
            # tok_start_to_orig_index = feature.tok_start_to_orig_index
            # tok_end_to_orig_index = feature.tok_end_to_orig_index
            # start_orig_pos = tok_start_to_orig_index[pred.start_index]
            # end_orig_pos = tok_end_to_orig_index[pred.end_index]
            # paragraph_text = example.paragraph_text
            # final_text = paragraph_text[start_orig_pos: end_orig_pos + 1].strip()

            # Previously used Bert untokenizer
            tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
            orig_doc_start = feature.token_to_orig_map[pred.start_index]
            orig_doc_end = feature.token_to_orig_map[pred.end_index]
            orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
            tok_text = tokenizer.convert_tokens_to_string(tok_tokens)

            # Clean whitespace
            tok_text = tok_text.strip()
            tok_text = " ".join(tok_text.split())
            orig_text = " ".join(orig_tokens)

            final_text = get_final_text(
                tok_text, orig_text, tokenizer.do_lower_case, verbose_logging)

            if final_text in seen_predictions:
                continue

            seen_predictions[final_text] = True

            nbest.append(
                _NbestPrediction(
                    text=final_text,
                    start_log_prob=pred.start_log_prob,
                    end_log_prob=pred.end_log_prob))

        # In very rare edge cases we could have no valid predictions. So we
        # just create a nonce prediction in this case to avoid failure.
        if not nbest:
            nbest.append(
                _NbestPrediction(
                    text="", start_log_prob=-1e6, end_log_prob=-1e6))

        total_scores = []
        best_non_null_entry = None
        for entry in nbest:
            total_scores.append(entry.start_log_prob + entry.end_log_prob)
            if not best_non_null_entry:
                best_non_null_entry = entry

        probs = _compute_softmax(total_scores)

        nbest_json = []
        for (i, entry) in enumerate(nbest):
            output = collections.OrderedDict()
            output["text"] = entry.text
            output["probability"] = probs[i]
            output["start_log_prob"] = entry.start_log_prob
            output["end_log_prob"] = entry.end_log_prob
            nbest_json.append(output)

        assert len(nbest_json) >= 1
        assert best_non_null_entry is not None

        score_diff = score_null
        scores_diff_json[example.qas_id] = score_diff
        # note(zhiliny): always predict best_non_null_entry
        # and the evaluation script will search for the best threshold
        all_predictions[example.qas_id] = best_non_null_entry.text

        all_nbest_json[example.qas_id] = nbest_json

    with open(output_prediction_file, "w") as writer:
        writer.write(json.dumps(all_predictions, indent=4) + "\n")

    with open(output_nbest_file, "w") as writer:
        writer.write(json.dumps(all_nbest_json, indent=4) + "\n")

    if version_2_with_negative:
        with open(output_null_log_odds_file, "w") as writer:
            writer.write(json.dumps(scores_diff_json, indent=4) + "\n")

    with open(orig_data_file, "r", encoding='utf-8') as reader:
        orig_data = json.load(reader)["data"]

    qid_to_has_ans = make_qid_to_has_ans(orig_data)
    has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
    no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
    exact_raw, f1_raw = get_raw_scores(orig_data, all_predictions)
    out_eval = {}

    find_all_best_thresh_v2(out_eval, all_predictions, exact_raw, f1_raw,
                            scores_diff_json, qid_to_has_ans)

    return out_eval
Beispiel #53
0
    update_line_numbers(filename)

    context = Context(
        mutation_id=mutation_id,
        filename=filename,
        dict_synonyms=dict_synonyms,
    )
    mutate_file(
        backup=backup,
        context=context,
    )
    if context.number_of_performed_mutations == 0:
        raise RuntimeError('No mutations performed.')


null_out = open(os.devnull, 'w')


class Config(object):
    def __init__(self, swallow_output, test_command, exclude_callback,
                 baseline_time_elapsed, test_time_multiplier, test_time_base,
                 backup, dict_synonyms, total, using_testmon, cache_only,
                 tests_dirs, hash_of_tests, pre_mutation, post_mutation):
        self.swallow_output = swallow_output
        self.test_command = test_command
        self.exclude_callback = exclude_callback
        self.baseline_time_elapsed = baseline_time_elapsed
        self.test_time_multipler = test_time_multiplier
        self.test_time_base = test_time_base
        self.backup = backup
        self.dict_synonyms = dict_synonyms
def read_squad_examples(input_file, is_training, version_2_with_negative):
    """Read a SQuAD json file into a list of SquadExample."""
    with open(input_file, "r", encoding='utf-8') as reader:
        input_data = json.load(reader)["data"]

    def is_whitespace(c):
        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
            return True
        return False

    def is_english_or_number(c):
        return (ord(c) > 64 and ord(c) < 91) or (ord(c) < 123 and ord(c) > 96)

    examples = []
    for entry in input_data:
        for paragraph in entry["paragraphs"]:
            paragraph_text = paragraph["context"]
            doc_tokens = []
            char_to_word_offset = []
            prev_is_whitespace = True
            for c in paragraph_text:
                if is_whitespace(c):
                    continue
                doc_tokens.append(c)
                char_to_word_offset.append(len(doc_tokens) - 1)

            for qa in paragraph["qas"]:
                qas_id = qa["id"]
                question_text = qa["question"]
                start_position = None
                end_position = None
                orig_answer_text = None
                is_impossible = False
                if is_training:
                    if (len(qa["answers"]) != 1) and (not is_impossible):
                        raise ValueError(
                            "For training, each question should have exactly 1 answer."
                        )
                    answer = qa["answers"][0]
                    orig_answer_text = answer["text"]
                    answer_offset = answer["answer_start"]
                    answer_length = len(orig_answer_text)
                    if answer_offset > len(char_to_word_offset) - 1:
                        logger.warning("样本错误: '%s'  offfset vs. length'%s'",
                                       answer_offset, len(char_to_word_offset))
                        continue
                    start_position = char_to_word_offset[answer_offset]
                    end_position = answer_offset + answer_length - 1
                    if end_position > len(char_to_word_offset) - 1:
                        logger.warning("样本错误: '%s' vs. '%s'", end_position, len(char_to_word_offset))
                        continue
                    end_position = char_to_word_offset[answer_offset +
                                                       answer_length - 1]
                    # Only add answers where the text can be exactly recovered from the
                    # document. If this CAN'T happen it's likely due to weird Unicode
                    # stuff so we will just skip the example.
                    #
                    # Note that this means for training mode, every example is NOT
                    # guaranteed to be preserved.
                    actual_text = "".join(
                        doc_tokens[start_position:(end_position + 1)])
                    cleaned_answer_text = "".join(
                        whitespace_tokenize(orig_answer_text))
                    if actual_text.find(cleaned_answer_text) == -1:
                        logger.warning("样本错误: '%s' vs. '%s'", actual_text,
                                       cleaned_answer_text)
                        continue

                example = SquadExample(
                    qas_id=qas_id,
                    question_text=question_text,
                    doc_tokens=doc_tokens,
                    orig_answer_text=orig_answer_text,
                    start_position=start_position,
                    end_position=end_position,
                    is_impossible=is_impossible)
                examples.append(example)
    return examples
Beispiel #55
0
def read(*names, **kwargs):
    return io.open(
        join(dirname(__file__), *names), encoding=kwargs.get("encoding", "utf8")
    ).read()
Beispiel #56
0
import io
import os
import re

from setuptools import setup

# Get the version from huggingmolecules/__init__.py
# Adapted from https://stackoverflow.com/a/39671214
this_directory = os.path.dirname(os.path.realpath(__file__))
init_path = os.path.join(this_directory, 'huggingmolecules', '__init__.py')
version_matches = re.search(
    r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
    io.open(init_path, encoding='utf_8_sig').read(),
)
if version_matches is None:
    raise Exception('Could not determine huggingmolecules version from __init__.py')
__version__ = version_matches.group(1)

setup(
    name='huggingmolecules',
    version=__version__,
    packages=['huggingmolecules'],
    install_requires=[
        #'torch==1.7.0',
        'scikit-learn>=0.23.2',
        'filelock>=3.0.12',
        'gdown>=3.12.2'
    ]
)
Beispiel #57
0
    # Compile a db of {for value => dict terms that use that for value}
    fors = defaultdict(set)
    for key, anchors_ in anchors.items():
        for anchor in anchors_:
            for for_ in anchor["for"]:
                if for_ == "":
                    continue
                fors[for_].add(key)
            if not anchor["for"]:
                fors["/"].add(key)
    for key, val in fors.items():
        fors[key] = list(val)

    if not config.dryRun:
        try:
            with io.open(config.scriptPath+"/spec-data/specs.json", 'w', encoding="utf-8") as f:
                f.write(unicode(json.dumps(specs, ensure_ascii=False, indent=2, sort_keys=True)))
        except Exception, e:
            die("Couldn't save spec database to disk.\n{0}", e)
            return
        try:
            with io.open(config.scriptPath+"/spec-data/headings.json", 'w', encoding="utf-8") as f:
                f.write(unicode(json.dumps(headings, ensure_ascii=False, indent=2, sort_keys=True)))
        except Exception, e:
            die("Couldn't save headings database to disk.\n{0}", e)
            return
        try:
            with io.open(config.scriptPath+"/spec-data/anchors.data", 'w', encoding="utf-8") as f:
                writeAnchorsFile(f, anchors)
        except Exception, e:
            die("Couldn't save anchor database to disk.\n{0}", e)
Beispiel #58
0
            (object, InfoExtractor, SearchInfoExtractor))
        stop = False
        for b in bases:
            if b not in classes and b not in ordered_cls:
                if b.__name__ == 'GenericIE':
                    exit()
                classes.insert(0, b)
                stop = True
        if stop:
            break
        if all(b in ordered_cls for b in bases):
            ordered_cls.append(c)
            classes.remove(c)
            break
ordered_cls.append(_ALL_CLASSES[-1])

names = []
for ie in ordered_cls:
    name = ie.__name__
    src = build_lazy_ie(ie, name)
    module_contents.append(src)
    if ie in _ALL_CLASSES:
        names.append(name)

module_contents.append('_ALL_CLASSES = [{0}]'.format(', '.join(names)))

module_src = '\n'.join(module_contents) + '\n'

with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
    f.write(module_src)
Beispiel #59
0
from io import open
from os import path
from setuptools import setup, find_packages

# set rootdir to the repository root directory
rootdir = path.abspath(path.dirname(__file__))

# read readme.md to long_description
with open(path.join(rootdir, "README.md"), encoding="utf-8") as readme:
    long_description = readme.read()

setup(
    # Required: project name
    name="torecsys",
    # Required: tag
    version="0.0.5.dev1",
    # Optional: short description
    description="Pure PyTorch Recommender System Module",
    # Optional: long description
    long_description=long_description,
    # Optional: long description type
    long_description_content_type="text/markdown",
    # Optional: project url
    url="https://github.com/p768lwy3/torecsys",
    # Optional: author
    author="Jasper Li",
    # Optional: author email
    author_email="*****@*****.**",
    # Classifier
    classifiers=[
        "Development Status :: 1 - Planning",
Beispiel #60
0
import io
import os

from google.cloud import vision
from google.cloud.vision import types

print(vision)

client = vision.ImageAnnotatorClient()
file_name = os.path.abspath('face.jpg')

with io.open(file_name, 'rb') as image_file:
    content = image_file.read()

image = types.Image(content=content)

#response = client.label_detection(image=image)
response = client.face_detection(image=image)
labels = response.label_annotations

print(response)
print("---------------------------------------------------")

print('Labels:')
for label in labels:
    print(label)