Esempio n. 1
0
if os.path.exists(cache_file) and os.path.getmtime(
        cache_file) > os.path.getmtime(os.path.join(_curpath, "dict.txt")):
    print >> sys.stderr, "loading model from cache"
    try:
        trie, FREQ, total, min_freq = marshal.load(open(cache_file, 'rb'))
        load_from_cache_fail = False
    except:
        load_from_cache_fail = True

if load_from_cache_fail:
    trie, FREQ, total = gen_trie(os.path.join(_curpath, "dict.txt"))
    FREQ = dict([(k, float(v) / total)
                 for k, v in FREQ.iteritems()])  #normalize
    min_freq = min(FREQ.itervalues())
    print >> sys.stderr, "dumping model to file cache"
    marshal.dump((trie, FREQ, total, min_freq), open(cache_file, 'wb'))

print >> sys.stderr, "loading model cost ", time.time() - t1, "seconds."
print >> sys.stderr, "Trie has been built succesfully."


def __cut_all(sentence):
    N = len(sentence)
    i, j = 0, 0
    p = trie
    while i < N:
        c = sentence[j]
        if c in p:
            p = p[c]
            if '' in p:
                yield sentence[i:j + 1]
Esempio n. 2
0
#Marshal module contains functions that can read and write Python values in a binary format.

import marshal
# Python code to demonstrate serialization
file1 = open("file1.txt","w")
bytes = marshal.dump([1,2,3,4,5],file1)
print bytes
file1.close()

# Python code to demonstrate de-serialization 
file2 = open("file1.txt","r")
bytes1 = marshal.load(file2)
print bytes1
file2.close()
Esempio n. 3
0
            print(x)
            if x not in cur:
                cur[x] = {}
                print(tree)
            print(cur[x])
            cur = cur[x]
            print(cur)
            print(cur.keys())
    print(tree)
    if 'g' not in tree:
        print('1')
    if 'b' not in tree:
        print('2')

    return tree


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(
        description='Build search tree from dictionary.')
    parser.add_argument('input_file', help='Input file name.')
    parser.add_argument('output_file', help='Output file name.')

    args = parser.parse_args()

    tree = gen(args.input_file)

    with open(args.output_file, 'wb') as f:
        pickle.dump(tree, f)
Esempio n. 4
0
 def dump_stats(self, file):
     import marshal
     f = open(file, 'wb')
     self.create_stats()
     marshal.dump(self.stats, f)
     f.close()
Esempio n. 5
0
    def initialize(self, dictionary=None):
    	#系统初始化主函数
        if dictionary:
            abs_path = _get_abs_path(dictionary)
            if self.dictionary == abs_path and self.initialized:
                return
            else:
                self.dictionary = abs_path
                self.initialized = False
        else:
            abs_path = self.dictionary

        with self.lock:
            try:
                with DICT_WRITING[abs_path]:
                    pass
            except KeyError:
                pass
            if self.initialized:
                return

            default_logger.debug("Building prefix dict from %s ..." % (abs_path or 'the default dictionary'))
            t1 = time.time()
            if self.cache_file:
                cache_file = self.cache_file
            # default dictionary
            elif abs_path == DEFAULT_DICT:
                cache_file = "jieba.cache"
            # custom dictionary
            else:
                cache_file = "jieba.u%s.cache" % md5(
                    abs_path.encode('utf-8', 'replace')).hexdigest()
            cache_file = os.path.join(
                self.tmp_dir or tempfile.gettempdir(), cache_file)
            # prevent absolute path in self.cache_file
            tmpdir = os.path.dirname(cache_file)

            load_from_cache_fail = True
            if os.path.isfile(cache_file) and (abs_path == DEFAULT_DICT or
                os.path.getmtime(cache_file) > os.path.getmtime(abs_path)):
                default_logger.debug(
                    "Loading model from cache %s" % cache_file)
                try:
                    with open(cache_file, 'rb') as cf:
                        self.FREQ, self.total = marshal.load(cf)
                    load_from_cache_fail = False
                except Exception:
                    load_from_cache_fail = True

            if load_from_cache_fail:
                wlock = DICT_WRITING.get(abs_path, threading.RLock())
                DICT_WRITING[abs_path] = wlock
                with wlock:
                    self.FREQ, self.total = self.gen_pfdict(self.get_dict_file())
                    default_logger.debug(
                        "Dumping model to file cache %s" % cache_file)
                    try:
                        # prevent moving across different filesystems
                        fd, fpath = tempfile.mkstemp(dir=tmpdir)
                        with os.fdopen(fd, 'wb') as temp_cache_file:
                            marshal.dump(
                                (self.FREQ, self.total), temp_cache_file)
                        _replace_file(fpath, cache_file)
                    except Exception:
                        default_logger.exception("Dump cache file failed.")

                try:
                    del DICT_WRITING[abs_path]
                except KeyError:
                    pass

            self.initialized = True
            default_logger.debug(
                "Loading model cost %.3f seconds." % (time.time() - t1))
            default_logger.debug("Prefix dict has been built succesfully.")
Esempio n. 6
0
 def dump_code_to_file(code, file):
     file.write(MAGIC)
     wr_long(file, long(time.time()))  # noqa
     marshal.dump(code, file)
     file.flush()
Esempio n. 7
0
def update_pyc(filename, new_path):
    """Updates the filenames stored in pyc files."""
    with open(filename, 'rb') as f:
        magic = f.read(MAGIC_LENGTH)
        try:
            code = marshal.load(f)
        except Exception:
            print('Error in %s' % filename)
            raise

    def _make_code(code, filename, consts):
        if sys.version_info[0] == 2:  # pragma: no cover (PY2)
            arglist = [
                code.co_argcount,
                code.co_nlocals,
                code.co_stacksize,
                code.co_flags,
                code.co_code,
                tuple(consts),
                code.co_names,
                code.co_varnames,
                filename,
                code.co_name,
                code.co_firstlineno,
                code.co_lnotab,
                code.co_freevars,
                code.co_cellvars,
            ]
        else:  # pragma: no cover (PY3)
            arglist = [
                code.co_argcount,
                code.co_kwonlyargcount,
                code.co_nlocals,
                code.co_stacksize,
                code.co_flags,
                code.co_code,
                tuple(consts),
                code.co_names,
                code.co_varnames,
                filename,
                code.co_name,
                code.co_firstlineno,
                code.co_lnotab,
                code.co_freevars,
                code.co_cellvars,
            ]
        return CodeType(*arglist)

    def _process(code):
        consts = []
        for const in code.co_consts:
            if type(const) is CodeType:
                const = _process(const)
            consts.append(const)
        if new_path != code.co_filename or consts != list(code.co_consts):
            code = _make_code(code, new_path, consts)
        return code

    new_code = _process(code)

    if new_code is not code:
        debug('B %s' % filename)
        with open(filename, 'wb') as f:
            f.write(magic)
            marshal.dump(new_code, f)
Esempio n. 8
0
#wd = webdriver.Chrome()
#base = 'https://iris.ucl.ac.uk/iris/search'
#wd.get(base)

#select = Select(wd.find_element_by_id('meta_x_phrase_sand'))
#select.select_by_visible_text('Researchers')
#wd.find_element_by_id('btnMainAdvSearch').click()
#sleep(30);

base = 'https://iris.ucl.ac.uk/iris/search/funnelbackResults?query=&collection=iris-meta&form=results&meta_x_phrase_sand=iris-researchers&f.Result+Categories%7cx=%2ciris+researchers&start_rank={}&query_and=&query_phrase=&query_not=&meta_t=&meta_O=&meta_d1day=&meta_d1month=&meta_d1year=&meta_d2day=&meta_d2month=&meta_d2year=&meta_N=&meta_S=&sort='
b = 'https://iris.ucl.ac.uk/iris/browse/profile?'
#

all = []
url_list = []
'''
for i in range(9585):
    if i % 10 == 1:
        all.append(base.format(i))

#marshal.dump(all, 'all')

#[:2]
for u in all:
    rsp = requests.get(u, verify=False)
    soup = BeautifulSoup(rsp.text, 'html.parser')
    for link in soup.find_all('a'):
        if link.get('href').find('upi=') != -1:
            if link.get('href') not in url_list:
                url_list.append(link.get('href').split('?')[1])
url_file = open("url_list", 'wb')
Esempio n. 9
0
def ecriture_obstacles(chemin,x,y,obstaclage):
	obstacles= [(random.randint(0,x-1),random.randint(0,y-1)) for mur in range(int(x*y*obstaclage))]
	fichier=open(chemin,"wb")
	marshal.dump(obstacles,fichier)
	fichier.close()
Esempio n. 10
0
            for ip in product(*zip(py_array,py_short)):
                initial_py = "".join(ip)
                py_freq[initial_py] = py_freq.get(initial_py,0) + max(int(log(freq)),1)
                if not initial_py in p2c:
                    p2c[initial_py] = []
                p2c[initial_py].append((freq,word))

    lines = None

    p2c      = dict( ( k,tuple( w[1] for w in sorted(v,reverse=True) ) ) for k,v in p2c.iteritems())
    total    = sum(chn_freq.itervalues())
    chn_freq = dict( (k,log(float(v)/total)) for k,v in chn_freq.iteritems() )
    py_freq  = dict( (k,log(float(v)/total)) for k,v in py_freq.iteritems()  ) 
    min_freq = min(py_freq.values())
    with file(CACHE_PATH,'wb') as cache_file:
        marshal.dump((p2c,chn_freq,py_freq,min_freq),cache_file)

print "init cost: ", time.time() - t_start
#end init


single_letters = set(ascii_lowercase)

print "data loaded."

def word_rank(word):
    if word in single_letters:
        return min_freq-1.0
    elif len(word)==1:
        return 0.0
    return py_freq.get(word,min_freq*20.0)
Esempio n. 11
0
def dump(value,file):
    m.dump(value,file)
Esempio n. 12
0
 def save_kindex(self):
     '''将 k-gram 索引保存到硬盘'''
     print(self.kindex)
     kgram_file = open(self.kindex_name, "wb")
     marshal.dump(self.kindex, kgram_file)
     kgram_file.close()
Esempio n. 13
0
 def dump(self, file="config.bin"):
     f = open(file, 'w')
     marshal.dump(self.byKey, f)
     #l = lambda y,x, f = f, d = marshal.dump: d(x,f)
     #reduce(l,self.byId)
     f.close()
Esempio n. 14
0
def create_py3_base_library(libzip_filename, graph):
    """
    Package basic Python modules into .zip file. The .zip file with basic
    modules is necessary to have on PYTHONPATH for initializing libpython3
    in order to run the frozen executable with Python 3.
    """
    # Construct regular expression for matching modules that should be bundled
    # into base_library.zip.
    # Excluded are plain 'modules' or 'submodules.ANY_NAME'.
    # The match has to be exact - start and end of string not substring.
    regex_modules = '|'.join([r'(^%s$)' % x for x in PY3_BASE_MODULES])
    regex_submod = '|'.join([r'(^%s\..*$)' % x for x in PY3_BASE_MODULES])
    regex_str = regex_modules + '|' + regex_submod
    module_filter = re.compile(regex_str)

    try:
        # Remove .zip from previous run.
        if os.path.exists(libzip_filename):
            os.remove(libzip_filename)
        logger.debug('Adding python files to base_library.zip')
        # Class zipfile.PyZipFile is not suitable for PyInstaller needs.
        with zipfile.ZipFile(libzip_filename, mode='w') as zf:
            zf.debug = 3
            # Sort the graph nodes by identifier to ensure repeatable builds
            graph_nodes = list(graph.flatten())
            graph_nodes.sort(key=lambda item: item.identifier)
            for mod in graph_nodes:
                if type(mod) in (modulegraph.SourceModule,
                                 modulegraph.Package):
                    # Bundling just required modules.
                    if module_filter.match(mod.identifier):
                        st = os.stat(mod.filename)
                        timestamp = int(st.st_mtime)
                        size = st.st_size & 0xFFFFFFFF
                        # Name inside the archive. The ZIP format
                        # specification requires forward slashes as
                        # directory separator.
                        # TODO use .pyo suffix if optimize flag is enabled.
                        if type(mod) is modulegraph.Package:
                            new_name = mod.identifier.replace('.', '/') \
                                + '/__init__.pyc'
                        else:
                            new_name = mod.identifier.replace('.', '/') \
                                + '.pyc'

                        # Write code to a file.
                        # This code is similar to py_compile.compile().
                        with io.BytesIO() as fc:
                            # Prepare all data in byte stream file-like object.
                            fc.write(BYTECODE_MAGIC)
                            if is_py37:
                                # Additional bitfield according to PEP 552
                                # 0b01 means hash based but don't check the hash
                                fc.write(struct.pack('<I', 0b01))
                                with open(mod.filename, 'rb') as fs:
                                    source_bytes = fs.read()
                                source_hash = importlib_source_hash(
                                    source_bytes)
                                fc.write(source_hash)
                            else:
                                fc.write(struct.pack('<II', timestamp, size))
                            marshal.dump(mod.code, fc)
                            # Use a ZipInfo to set timestamp for deterministic build
                            info = zipfile.ZipInfo(new_name)
                            zf.writestr(info, fc.getvalue())

    except Exception as e:
        logger.error('base_library.zip could not be created!')
        raise
Esempio n. 15
0
def create_py3_base_library(libzip_filename, graph):
    """
    Package basic Python modules into .zip file. The .zip file with basic
    modules is necessary to have on PYTHONPATH for initializing libpython3
    in order to run the frozen executable with Python 3.
    """

    # TODO Replace this function with something better or something from standard Python library.
    # Helper functions.
    def _write_long(f, x):
        """
        Write a 32-bit int to a file in little-endian order.
        """
        f.write(
            bytes([
                x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff, (x >> 24) & 0xff
            ]))

    # Construct regular expression for matching modules that should be bundled
    # into base_library.zip.
    # Excluded are plain 'modules' or 'submodules.ANY_NAME'.
    # The match has to be exact - start and end of string not substring.
    regex_modules = '|'.join([r'(^%s$)' % x for x in PY3_BASE_MODULES])
    regex_submod = '|'.join([r'(^%s\..*$)' % x for x in PY3_BASE_MODULES])
    regex_str = regex_modules + '|' + regex_submod
    module_filter = re.compile(regex_str)

    try:
        # Remove .zip from previous run.
        if os.path.exists(libzip_filename):
            os.remove(libzip_filename)
        logger.debug('Adding python files to base_library.zip')
        # Class zipfile.PyZipFile is not suitable for PyInstaller needs.
        with zipfile.ZipFile(libzip_filename, mode='w') as zf:
            zf.debug = 3
            for mod in graph.flatten():
                if type(mod) in (modulegraph.SourceModule,
                                 modulegraph.Package):
                    # Bundling just required modules.
                    if module_filter.match(mod.identifier):
                        st = os.stat(mod.filename)
                        timestamp = int(st.st_mtime)
                        size = st.st_size & 0xFFFFFFFF
                        # Name inside a zip archive.
                        # TODO use .pyo suffix if optimize flag is enabled.
                        if type(mod) is modulegraph.Package:
                            new_name = mod.identifier.replace(
                                '.', os.sep) + os.sep + '__init__' + '.pyc'
                        else:
                            new_name = mod.identifier.replace('.',
                                                              os.sep) + '.pyc'

                        # Write code to a file.
                        # This code is similar to py_compile.compile().
                        with io.BytesIO() as fc:
                            # Prepare all data in byte stream file-like object.
                            fc.write(BYTECODE_MAGIC)
                            _write_long(fc, timestamp)
                            _write_long(fc, size)
                            marshal.dump(mod.code, fc)
                            zf.writestr(new_name, fc.getvalue())

    except Exception as e:
        logger.error('base_library.zip could not be created!')
        raise
Esempio n. 16
0
def main(args=None):

    from os.path import exists, join, dirname, isdir

    if args == None:
        args = sys.argv[1:]
        logging.basicConfig()

    options, args = parser.parse_args(args)

    if options.lock_file:
        import zc.lockfile
        lock = zc.lockfile.LockFile(options.lock_file)
    else:
        lock = None

    fudge = options.clock_fudge_factor
    encoding = options.file_system_encoding
    had_index = False
    s3 = {}
    if options.index:
        if not options.ignore_index and exists(options.index):
            with open(options.index) as f:
                s3 = marshal.load(f)
            had_index = True
        index = {}
    else:
        index = None

    src_path, bucket_name = args

    if '/' in bucket_name:
        bucket_name, bucket_prefix = bucket_name.split('/', 1)
    else:
        bucket_prefix = ''
    len_bucket_prefix = len(bucket_prefix)

    fs = {}
    queue = Queue.Queue(maxsize=999)
    put = queue.put

    cloudfront = options.cloudfront
    invalidations = []

    generate_index_html = options.generate_index_html
    GENERATE = object()
    INDEX_HTML = "index.html"

    def worker(base_path):
        mtime = path = 0
        while 1:
            try:
                mtime, queued_path = queue.get()

                path = queued_path
                if path is None:
                    return

                key = boto.s3.key.Key(bucket)

                if mtime is None:  # delete
                    try:
                        try:
                            key.key = bucket_prefix + path
                            key.delete()
                        except Exception:
                            logger.exception('deleting %r, retrying' % key.key)
                            time.sleep(9)
                            key.key = bucket_prefix + path
                            key.delete()
                    except Exception:
                        if index is not None:
                            # Failed to delete. Put the key back so we
                            # try again later
                            index[queued_path] = 1
                        raise

                elif mtime is GENERATE:
                    (path, s3mtime) = path
                    fspath = join(base_path, path.encode(encoding))
                    if exists(fspath):
                        # Someone created a file since we decided to
                        # generate one.
                        continue

                    fspath = dirname(fspath)
                    data = "Index of " + path[:-len(INDEX_HTML) - 1]
                    data = [
                        "<!-- generated -->",
                        "<html><head><title>%s</title></head><body>" % data,
                        "<h1>%s</h1><table>" % data,
                        "<tr><th>Name</th><th>Last modified</th><th>Size</th>"
                        "</tr>",
                    ]
                    for name in sorted(os.listdir(fspath)):
                        if name.startswith('.'):
                            continue  # don't index dot files
                        name_path = join(fspath, name)
                        if isdir(name_path):
                            name = name + '/'
                            size = '-'
                        else:
                            size = os.stat(name_path).st_size
                        mtime = time.ctime(os.stat(name_path).st_mtime)
                        name = name.decode(encoding)
                        data.append('<tr><td><a href="%s">%s</a></td>\n'
                                    '    <td>%s</td><td>%s</td></tr>' %
                                    (name, name, mtime, size))
                    data.append("</table></body></html>\n")
                    data = '\n'.join(data)

                    digest = hashlib.md5(data.encode(encoding)).hexdigest()
                    if digest != s3mtime:
                        # Note that s3mtime is either a previous
                        # digest or it's 0 (cus path wasn't in s3) or
                        # it's an s3 upload time.  The test above
                        # works in all of these cases.
                        key.key = bucket_prefix + path
                        key.set_metadata('generated', 'true')
                        try:
                            key.set_contents_from_string(
                                data,
                                headers={'Content-Type': 'text/html'},
                            )
                        except Exception:
                            logger.exception(
                                'uploading generated %r, retrying' % path)
                            time.sleep(9)
                            key.set_contents_from_string(
                                data,
                                headers={'Content-Type': 'text/html'},
                            )

                        if s3mtime:
                            # update (if it was add, mtime would be 0)
                            if cloudfront:
                                invalidations.append(path)

                    if index is not None:
                        index[path] = digest

                else:  # upload
                    try:
                        if had_index:
                            # We only store mtimes to the nearest second.
                            # We don't have a fudge factor, so there's a
                            # chance that someone might update the file in
                            # the same second, so we check if a second has
                            # passed and sleep if it hasn't.
                            now = time_time_from_sixtuple(
                                time.gmtime(time.time()))
                            if not now > mtime:
                                time.sleep(1)

                        key.key = bucket_prefix + path
                        path = join(base_path, path)
                        try:
                            key.set_contents_from_filename(
                                path.encode(encoding))
                        except Exception:
                            logger.exception('uploading %r %r, retrying' %
                                             (mtime, path))
                            time.sleep(9)
                            key.set_contents_from_filename(
                                path.encode(encoding))

                    except Exception:
                        if index is not None:
                            # Upload failed. Remove from index so we
                            # try again later (if the path is still
                            # around).
                            index.pop(queued_path)
                        raise

            except Exception:
                logger.exception('processing %r %r' % (mtime, path))
            finally:
                queue.task_done()

    workers = [thread(worker, src_path) for i in range(options.worker_threads)]

    # As we build up the 2 dicts, we try to identify cases we can
    # eliminate right away, or cases we can begin handling, so we can
    # avoid accumulating them, and also so we can start processing
    # sooner.

    def listfs(path, base):
        for name in sorted(os.listdir(path)):
            pname = join(path, name)
            rname = join(base, name)
            if isdir(pname):
                listfs(pname, rname)
                if generate_index_html and not exists(join(pname, INDEX_HTML)):
                    key = rname.decode(encoding) + '/' + INDEX_HTML
                    # We don't short circuit by checking s3 here.
                    # We'll do that at the end.
                    fs[key] = -1
            else:
                try:
                    mtime = time_time_from_sixtuple(
                        time.gmtime(os.stat(pname).st_mtime))
                except OSError:
                    logger.exception("bad file %r" % rname)
                    continue

                key = rname.decode(encoding)
                if index is not None:
                    index[key] = mtime
                if key in s3:
                    # We can go ahead and do the check
                    s3mtime = s3.pop(key)
                    if (isinstance(s3mtime, basestring)  # generated
                            or mtime > s3mtime):
                        put((mtime, key))
                        if cloudfront:
                            invalidations.append(key)
                else:
                    fs[key] = mtime

    fs_thread = thread(listfs, src_path, '')

    s3conn = boto.s3.connection.S3Connection()
    bucket = s3conn.get_bucket(bucket_name)

    if not had_index:

        @thread
        def s3_thread():
            for key in bucket.list(bucket_prefix):

                s3mtime = time_time_from_sixtuple(parse_time(
                    key.last_modified))

                # subtract a fudge factor to account for crappy clocks and bias
                # caused by delat between start of upload and
                # computation of last_modified.
                s3mtime -= fudge

                path = key.key[len_bucket_prefix:]

                if path in fs:
                    mtime = fs.pop(path)
                    if mtime > s3mtime:
                        put((mtime, path))
                        if cloudfront:
                            invalidations.append(path)
                    elif mtime == -1:
                        # generate marker. Put it back.
                        fs[path] = -1
                else:
                    s3[path] = s3mtime

        s3_thread.join()

    fs_thread.join()

    for (path, mtime) in fs.iteritems():
        s3mtime = s3.pop(path, 0)
        if mtime == -1:
            # We generate unconditionally, because the content
            # is dynamic.  We pass along the old s3mtime, which might
            # be an old digest to see if we actually have to update s3.
            put((GENERATE, (path, s3mtime)))
        else:
            if mtime > s3mtime:
                put((mtime, path))

                if s3mtime:
                    # update (if it was add, mtime would be 0)
                    if cloudfront:
                        invalidations.append(path)

    if not options.no_delete:
        for path in s3:
            put((None, path))
            if cloudfront:
                invalidations.append(path)

    queue.join()

    if index is not None:
        with open(options.index, 'w') as f:
            marshal.dump(index, f)

    if cloudfront:
        cfconn = boto.connect_cloudfront()
        time.sleep(
            9)  # give a little extra type for the s3 updates to propigate
        for start in range(0, len(invalidations), 1000):
            cfconn.create_invalidation_request(
                cloudfront,
                [bucket_prefix + p for p in invalidations[start:start + 1000]])

    if lock is not None:
        lock.close()

    for _ in workers:
        put((None, None))
    for w in workers:
        w.join()
from bx.misc.seekbzip2 import SeekableBzip2File


parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('bz2', help='bz2 dump file')
parser.add_argument('bz2t', help='bzip-table file')
parser.add_argument('output', help='destination file')
parser.add_argument('--offsets-only', action='store_true')
args = parser.parse_args()


index = {}
dump = SeekableBzip2File(args.bz2, args.bz2t)
offset = 0
try:
    for line in dump:
        if line == '  <page>\n':
            start = offset
        elif line.startswith('    <title>'):
            title = line[11:-9]
            index[title] = start
        offset = dump.tell()
finally:
    dump.close()

if args.offsets_only:
    index = tuple(index)

with open(args.output, 'wb') as f:
    marshal.dump(index, f)
Esempio n. 18
0
        codestring = codestring + '\n'
    try:
        codeobject = ompc.compile(codestring, dfile or file,'exec')
    except Exception, err:
        py_exc = MCompileError(err.__class__,err.args,dfile or file)
        if doraise:
            raise py_exc
        else:
            sys.stderr.write(py_exc.msg + '\n')
            return
    if cfile is None:
        cfile = file + (__debug__ and 'c' or 'o')
    fc = open(cfile, 'wb')
    fc.write('\0\0\0\0')
    wr_long(fc, timestamp)
    marshal.dump(codeobject, fc)
    fc.flush()
    fc.seek(0, 0)
    fc.write(MAGIC)
    fc.close()
    set_creator_type(cfile)

def main(args=None):
    """Compile several source files.

    The files named in 'args' (or on the command line, if 'args' is
    not specified) are compiled and the resulting bytecode is cached
    in the normal manner.  This function does not search a directory
    structure to locate source files; it only compiles files named
    explicitly.
Esempio n. 19
0
def writePyc(code, path):
    f = open(path, "wb")
    f.write(MAGIC)
    f.write("\0" * 4)  # don't bother about a time stamp
    marshal.dump(code, f)
    f.close()
Esempio n. 20
0
 def save(self):
     if self.usecache and self.cache:
         self.canonicalize_filenames()
         import marshal
         with open(self.cache, 'wb') as cache:
             marshal.dump(self.cexecuted, cache)
def initialize(*args):
    global trie, FREQ, total, min_freq, initialized
    if len(args) == 0:
        dictionary = DICTIONARY
    else:
        dictionary = args[0]
    with DICT_LOCK:
        if initialized:
            return
        if trie:
            del trie
            trie = None
        _curpath = os.path.normpath(
            os.path.join(os.getcwd(), os.path.dirname(__file__)))

        abs_path = os.path.join(_curpath, dictionary)
        logger.debug("Building Trie..., from %s" % abs_path)
        t1 = time.time()
        if abs_path == os.path.join(_curpath, "dict.txt"):  #defautl dictionary
            cache_file = os.path.join(tempfile.gettempdir(), "jieba.cache")
        else:  #customer dictionary
            cache_file = os.path.join(
                tempfile.gettempdir(),
                "jieba.user." + str(hash(abs_path)) + ".cache")

        load_from_cache_fail = True
        if os.path.exists(cache_file) and os.path.getmtime(
                cache_file) > os.path.getmtime(abs_path):
            logger.debug("loading model from cache %s" % cache_file)
            try:
                trie, FREQ, total, min_freq = marshal.load(
                    open(cache_file, 'rb'))
                load_from_cache_fail = False
            except:
                load_from_cache_fail = True

        if load_from_cache_fail:
            trie, FREQ, total = gen_trie(abs_path)
            FREQ = dict([(k, log(float(v) / total))
                         for k, v in FREQ.iteritems()])  #normalize
            min_freq = min(FREQ.itervalues())
            logger.debug("dumping model to file cache %s" % cache_file)
            try:
                tmp_suffix = "." + str(random.random())
                with open(cache_file + tmp_suffix, 'wb') as temp_cache_file:
                    marshal.dump((trie, FREQ, total, min_freq),
                                 temp_cache_file)
                if os.name == 'nt':
                    import shutil
                    replace_file = shutil.move
                else:
                    replace_file = os.rename
                replace_file(cache_file + tmp_suffix, cache_file)
            except:
                logger.error("dump cache file failed.")
                logger.exception("")

        initialized = True

        logger.debug("loading model cost %s seconds." % (time.time() - t1))
        logger.debug("Trie has been built succesfully.")
Esempio n. 22
0
    def build_archive(self, libpath, delete_existing_resources=False):
        """Build the archive containing the Python library.
        """
        if self.options.bundle_files <= 1:
            # Add pythonXY.dll as resource into the library file
            #
            # XXX We should add a flag to the exe so that it does not try to load pythonXY.dll
            # from the file system.
            # XXX XXX XXX
            with UpdateResources(
                    libpath,
                    delete_existing=delete_existing_resources) as resource:
                with open(pydll, "rb") as ifi:
                    pydll_bytes = ifi.read()
                # We do not need to replace the winver string resource
                # in the python dll since it will be loaded via
                # MemoryLoadLibrary, and so python cannot find the
                # string resources anyway.
                if self.options.verbose > 1:
                    print("Add resource %s/%s(%d bytes) to %s" %
                          (os.path.basename(pydll), 1, len(pydll_bytes),
                           libpath))
                resource.add(type=os.path.basename(pydll),
                             name=1,
                             value=pydll_bytes)

        if self.options.optimize:
            bytecode_suffix = OPTIMIZED_BYTECODE_SUFFIXES[0]
        else:
            bytecode_suffix = DEBUG_BYTECODE_SUFFIXES[0]

        if self.options.compress:
            compression = zipfile.ZIP_DEFLATED
        else:
            compression = zipfile.ZIP_STORED

        # Create a zipfile and append it to the library file
        arc = zipfile.ZipFile(libpath, "a", compression=compression)

        # The same modules may be in self.ms.modules under different
        # keys; we only need one of them in the archive.
        for mod in set(self.mf.modules.values()):
            if mod.__code__:
                path = mod.__dest_file__
                stream = io.BytesIO()
                stream.write(imp.get_magic())
                if sys.version_info >= (3, 7, 0):
                    stream.write(b"\0\0\0\0")  # null flags
                stream.write(b"\0\0\0\0")  # null timestamp
                stream.write(b"\0\0\0\0")  # null size
                marshal.dump(mod.__code__, stream)
                arc.writestr(path, stream.getvalue())

            elif hasattr(mod, "__file__"):
                assert mod.__file__.endswith(EXTENSION_TARGET_SUFFIX)
                if self.options.bundle_files <= 2:
                    # put .pyds into the archive
                    arcfnm = mod.__name__.replace(
                        ".", "\\") + EXTENSION_TARGET_SUFFIX
                    if self.options.verbose > 1:
                        print("Add %s to %s" %
                              (os.path.basename(mod.__file__), libpath))
                    arc.write(mod.__file__, arcfnm)
                else:
                    # The extension modules will be copied into
                    # dlldir.  To be able to import it without dlldir
                    # being on sys.path, create a loader module and
                    # put that into the archive.
                    pydfile = mod.__name__ + EXTENSION_TARGET_SUFFIX
                    if self.options.verbose > 1:
                        print("Add Loader for %s to %s" %
                              (os.path.basename(mod.__file__), libpath))
                    loader = LOAD_FROM_DIR.format(pydfile)

                    code = compile(loader,
                                   "<loader>",
                                   "exec",
                                   optimize=self.options.optimize)
                    if hasattr(mod, "__path__"):
                        path = mod.__name__.replace(
                            ".", "\\") + "\\__init__" + bytecode_suffix
                    else:
                        path = mod.__name__.replace(".",
                                                    "\\") + bytecode_suffix
                    stream = io.BytesIO()
                    stream.write(imp.get_magic())
                    if sys.version_info >= (3, 7, 0):
                        stream.write(b"\0\0\0\0")  # null flags
                    stream.write(b"\0\0\0\0")  # null timestamp
                    stream.write(b"\0\0\0\0")  # null size
                    marshal.dump(code, stream)
                    arc.writestr(path, stream.getvalue())

        if self.options.bundle_files == 0:
            # put everything into the arc
            files = self.mf.all_dlls()
        elif self.options.bundle_files in (1, 2):
            # put only extension dlls into the arc
            files = self.mf.extension_dlls()
        else:
            arc.close()
            return

        for src in files:
            if self.options.verbose > 1:
                print("Add DLL %s to %s" % (os.path.basename(src), libpath))
            arc.write(src, os.path.basename(src))

        arc.close()
Esempio n. 23
0
 def marshal_dump(code, f):
     if isinstance(f, file):
         marshal.dump(code, f)
     else:
         f.write(marshal.dumps(code))
Esempio n. 24
0
import parser, marshal, os, __future__

DUMPFILE = 'this_is_the_marshal_file'


def reallycompile(tuples_or_src, filename, mode, flag_names):
    if type(tuples_or_src) is str:
        flags = 0
        if 'nested_scopes' in flag_names:
            flags |= __future__.CO_NESTED
        if 'generators' in flag_names:
            flags |= __future__.CO_GENERATOR_ALLOWED
        if 'division' in flag_names:
            flags |= __future__.CO_FUTURE_DIVISION
        return compile(tuples_or_src, filename, mode, flags)
    return parser.compileast(parser.tuple2ast(tuples_or_src), filename)


if __name__ == '__main__':
    s = file(DUMPFILE, "rb").read()
    tup = marshal.loads(s)
    tuples_or_src, filename, mode, done, flag_names = tup
    try:
        code = reallycompile(tuples_or_src, filename, mode, flag_names)
    except SyntaxError, e:
        code = e.msg, (e.filename, e.lineno, e.offset, e.text)
    done = True
    tup = (code, filename, mode, done, flag_names)
    marshal.dump(tup, file(DUMPFILE, "wb"))
Esempio n. 25
0
def dump(path, data):
    """Serialize the given data and write it to a file at the given path."""
    import marshal
    file = open(path, 'w')
    marshal.dump(data, file)
    file.close()
Esempio n. 26
0
  elements = set([g for g in generators])

  for generation_index in xrange(generation_limit):
    generation_start_count = len(elements)
    for generator in generators:
      new_elements = [element * generator for element in elements]
      new_elements.extend([generator * element for element in elements])
      for el in new_elements:
        elements.add(el)
    print "Generation #" + repr(generation_index), ": ", len(elements)
    generation_end_count = len(elements)
    if generation_end_count == generation_start_count:
      break
  return elements

generators = [State(g) for g in get_generators()]

elements = generate_loop(generators)

print "Element count:", len(elements)


element_values = [e.double for e in elements]

file_name = '../data/states_' + str(len(elements))+ '.txt'

with open(file_name, 'w') as f:
  marshal.dump(element_values, f)
  print "Saved states to " + file_name
  
Esempio n. 27
0
 def dump_stats(self, file):
     import marshal
     with open(file, 'wb') as f:
         self.create_stats()
         marshal.dump(self.stats, f)
Esempio n. 28
0
 def handle_item(path, item):
     marshal.dump((path, item), stdout)
     return True
Esempio n. 29
0
 def save_toc(self, tocpos):
     """Default - toc is a dict
        Gets marshaled to self.lib
     """
     marshal.dump(self.toc, self.lib)
Esempio n. 30
0
 def export_item(self, item):
     marshal.dump(dict(self._get_serialized_fields(item)), self.file)