Exemple #1
0
 def get(self, searchKey, exhaustive=False):
     # exhaustive must be True if keys are not sorted in ascending order
     if exhaustive:
         for i in range(len(self.mapping)):
             nowCDB = self.mapping[i]['cdb']
             targetCDB = cdb.init(nowCDB)
             if self.repeated_keys:
                 value = targetCDB.getall(searchKey.encode('utf-8'))
             else:
                 value = targetCDB.get(searchKey.encode('utf-8'))
             if value:
                 return value
         return None
     else:
         nowCDB = self.mapping[0]['cdb']
         for i in range(1, len(self.mapping)):
             nowKey = self.mapping[i]['key']
             if self.numerical_keys:
                 if int(searchKey) < int(nowKey):
                     break
             else:
                 if searchKey.encode('utf-8') < nowKey:
                     break
             nowCDB = self.mapping[i]['cdb']
         targetCDB = cdb.init(nowCDB)
         if self.repeated_keys:
             value = targetCDB.getall(searchKey.encode('utf-8'))
         else:
             value = targetCDB.get(searchKey.encode('utf-8'))
         return value
Exemple #2
0
 def __init__(self, key, reverse=False, cbc=False, basedir='.', debug=0):
     self._hmac = hmac.HMAC(key)  # Defaults to MD5.
     self.reverse = reverse
     self.cbc = cbc
     self.debug = debug
     self.WORD2GROUP = cdb.init(os.path.join(basedir, 'w2g.cdb'))
     self.GROUP2WORDS = cdb.init(os.path.join(basedir, 'g2w.cdb'))
     self._a0 = None
     self._a1 = None
     return
Exemple #3
0
    def __init__(self, filename):
        self.filename = filename
        self.tempfile = "%s.tmp" % filename

        self.db = {}
        try:
            self.cdb = cdb.init(self.filename)
        except cdb.error:
            d = cdb.cdbmake(self.filename, self.tempfile)
            d.finish()
            del d
            self.cdb = cdb.init(self.filename)
Exemple #4
0
def example_function(param):
    """
    Example function.

    Keyword arguments:
    param -- the return value

    """
    pages = {'Genetics': {'id': 4}, 'Other': {'id': 5}}
    writer = MediaWikiCdbWriter()
    writer.writeCdbIdFromName("../cdb/pageIdFromName.cdb", pages)
    pageName = "Genetics"
    pageIdFromName = cdb.init("../cdb/pageIdFromName.cdb")
    p = pageIdFromName.get(pageName)
    s = struct.Struct("<l")
    i = s.unpack(p)
    print "xx", i[0]

    d = CdbDictIdFromName("../cdb/pageIdFromName.cdb")
    print "yy", d['Genetics']

    mpp = MyPrettyPrinter()

    pageProjectsFromId = CdbDictPageProjectsFromId("../cdb/pageProjectsFromId.cdb")
    print "pageProjects"
    #print pageProjectsFromId
    #mpp.pprint(pageProjectsFromId)

    d = CdbDictNameFromId("../cdb/pageNameFromId.cdb")
    print "CdbDictNameFromId"
    print d
    print "keys"
    print d.keys()
    print "d[]"
    for i in d:
        print i, d[i]

    d = CdbDictIdFromName("../cdb/pageIdFromName.cdb")
    print "CdbDictIdFromName"
    print "keys"
    print d.keys()
    print d['Genetics']
    print "d[]"
    for i in d:
        print i, d[i]
    return
    print "d.keys()"
    for i in d.keys():
        print i, d[i]
    print "d.interkeys()"
    for k in d.iterkeys():
        print d[k]
    print "d.intervalues()"
    for v in d.itervalues():
        print v
    print "d.interitems()"
    for k, v in d.iteritems():
        print 'd[', k, '] = ', v

    return param
Exemple #5
0
 def __getitem__(self, key):            
     
     try:
         return self.db.get( key ) 
     except:
         self.db = cdb.init( self.fn )
         return self.db.get( key ) 
Exemple #6
0
def get_sentence_by_sid(sid, sid2sent_dir):
    sid = sid.split('%')[0]
    sid_components = sid.split('-')

    if os.path.basename(sid2sent_dir) == "v2006-2015.text-cdb":
        if 'data' in sid:
            sid_components = [x for x in sid_components if x != ""]
            sub_dirs = [sid_components[0], sid_components[1]] + list(
                sid_components[2][:3]) + [sid_components[2][:4]]
            sub_dir_str = "/".join(sub_dirs)
        else:
            sub_dirs = [sid_components[0]] + list(
                sid_components[1][:3]) + [sid_components[1][:4]]
            sub_dir_str = "/".join(sub_dirs)
    else:
        #if os.path.basename(sid2sent_dir) == "tsubame.results.orig-cdb":
        sub_dirs = [
            sid_components[0], sid_components[1][:4], sid_components[1][:6]
        ]
        sub_dir_str = "/".join(sub_dirs)

    try:
        sid2sent = "%s/%s.cdb" % (sid2sent_dir, sub_dir_str)
        SID2SENT = cdb.init(sid2sent.encode('utf-8'))
        sent = SID2SENT.get(sid)
        if sent == None:
            sys.stderr.write("Cannot retrieve sentence of sid:%s.\n" % sid)
        return sent
    except:
        sys.stderr.write("Cannot retrieve sentence of sid:%s.\n" % sid)
 def __init__(self, filename):
     self.filename = filename
     self.struct = struct.Struct("<l")  # "<l" is 32bit little endian integer
     if not os.path.exists(self.filename):
         open(filename, "w").close()
     self.cdb = cdb.init(filename)
     dict.__init__(self)
Exemple #8
0
    def __init__(self, name, converter):
        super(CachedDB, self).__init__()
        mcdb_name = name[:-4] + '.mcdb'
        if os.path.exists(name):
            print("CDB: opening", name)
            self.db = cdb.init(name)
            self.db_contains = lambda key: self.db.has_key(key)
        elif os.path.exists(mcdb_name):
            print("MCDB: opening", mcdb_name)
            self.db = mcdb.read(mcdb_name)
            self.db_contains = lambda key: self.db.get(key, _missing
                                                       ) is not _missing
        else:
            raise ValueError("Unknown file: %s" % (name, ))
        if converter in [int, float]:
            if converter == int:
                s = struct.Struct('<Q').unpack
            else:
                s = struct.Struct('<f').unpack

            def c(v, s=s):
                return s(v)[0]

            self.converter = c
        elif converter == 'blosc_to_list':
            self.converter = blosc_decompress_int_list
        self.cache = {}
Exemple #9
0
    def __getitem__(self, key):

        try:
            return self.db.get(key)
        except:
            self.db = cdb.init(self.fn)
            return self.db.get(key)
Exemple #10
0
    def sync(self, force=False):
        if not self.db:
            return

        tmp = cdb.cdbmake(self.filename, self.tempfile)

        # Copy original
        r = self.cdb.each()
        while r:
            k, v = r
            dk = decode(k)
            if k not in self.db:
                tmp.add(*r)
            r = self.cdb.each()

        # Add new stuff
        for k, l in self.db.iteritems():
            for v in l:
                try:
                    tmp.add(k, v)
                except:
                    print(k, v)
                    raise

        tmp.finish()
        self.cdb = cdb.init(self.filename)
        self.db = {}
Exemple #11
0
def search_cdbs(cdbs, key, save_null=False):
    hits = []
    for this_cdb in cdbs:
        this_cdb = cdb.init(this_cdb)
        this_hits = this_cdb.get(key)
        if this_hits != None or save_null:
            hits.append(this_hits)
    return hits
Exemple #12
0
 def clear(self):
     """Remove all entries from the dictionary."""
     os.remove(self.filename)
     open(self.filename, "w").close()
     maker = cdb.cdbmake(self.filename, self.filename + ".tmp")
     maker.finish()
     del(maker)
     self.cdb = cdb.init(self.filename)
Exemple #13
0
 def iteritems(self):
     c = cdb.init(self._name_db)
     r = c.each()
     while r:
         item = Item(self, r[0].decode('utf-8'))
         item._fs_item_id = r[1]
         yield item
         r = c.each()
Exemple #14
0
    def _get_item_id(self, itemname):
        """
        Get ID of item (or None if no such item exists)

        @param itemname: name of item (unicode)
        """
        c = cdb.init(self._name_db)
        return c.get(itemname.encode('utf-8'))
 def printCdbFromIdFile(self, filename):
     print "\nfile:" + filename
     c = cdb.init(filename)
     k = c.firstkey()
     while k is not None:
         v = c.get(k)
         i = struct.unpack("<l", k)
         print hex(i[0]), "=>", v
         k = c.nextkey()
def cdb_read_proc (file_cdb):
	dict_aa = {}
	cdb_o = cdb.init (file_cdb)
	rr = cdb_o.each()
	while rr:
		unit = json.loads (rr[1])
		dict_aa[rr[0]] = unit
		rr = cdb_o.each()
#
	return dict_aa
Exemple #17
0
 def __init__(self, config, bli):
     self.base_dir = config.base_dir
     self.db_backend = config.db_backend
     self.cache = bli.cache
     self.cdb_cache = dict()
     if self.db_backend == "ram":
         pass
     elif self.db_backend == "cdb":
         for blacklist in self.cache:
             self.cdb_cache[blacklist] = cdb.init(blacklist)
     self.loop()
Exemple #18
0
 def update(self, values):
     """Add values to the dictionary."""
     maker = cdb.cdbmake(self.filename, self.filename + ".tmp")
     for i in values:
         # add key,value
         maker.add(self._pack_key(i), self._pack_value(values[i]))
     print "Added %d records to CDB %s (fd %d)" \
         % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
     self.cdb = cdb.init(self.filename)
def get_orig_sentence(sid):
    sid = sid.split('%')[0]
    sub_dir = sid.split('-')[0]
    sub_dir2 = sid.split('-')[1][:4]
    sub_dir3 = sid.split('-')[1][:6]
    file_loc = "%s/%s/%s/%s.cdb" % (ORIG_DIR, sub_dir, sub_dir2, sub_dir3)
    #sys.stderr.write(file_loc+ '\n')
    F = cdb.init(file_loc)
    sent = F.get(sid)
    #sys.stderr.write(sent + '\n')
    return sent
 def __init__(self, config, bli):
     self.base_dir = config.base_dir
     self.db_backend = config.db_backend
     self.cache = bli.cache
     self.cdb_cache = dict()
     if self.db_backend == "ram":
         pass
     elif self.db_backend == "cdb":
         for blacklist in self.cache:
             self.cdb_cache[blacklist] = cdb.init(blacklist)
     self.loop()
 def get(self, searchKey, exhaustive=False):
     if exhaustive:
         for i in range(len(self.mapping)):
             nowCDB = cdb.init(self.mapping[i]["cdb"])
             value = nowCDB.getall(searchKey)
             if value:
                 return value
         return None
     else:
         nowCDB = self.mapping[-1]["cdb"]
         for i in range(1, len(self.mapping)):
             nowKey = self.mapping[i]["key"]
             if searchKey.encode("utf-8") < nowKey:
                 nowCDB = self.mapping[i - 1]["cdb"]
                 break
         targetCDB = cdb.init(nowCDB)
         value = (
             targetCDB.getall(searchKey.encode("utf-8"))
             if self.repeated_keys
             else targetCDB.get(searchKey.encode("utf-8"))
         )
         return value
Exemple #22
0
    def test_reducer(self):
        red = CDBReducer()
        output = red(zip('abcde', '12345'))

        fn = mkstemp()[1]
        fo = open(fn, 'wb')
        fo.writelines(v for k, v in output)
        fo.close()

        db = cdb.init(fn)
        self.assertEqual([(k, db[k]) for k in db.keys()],
                [('a', '1'), ('b', '2'), ('c', '3'), ('d', '4'), ('e', '5')])
        os.remove(fn)
 def __init__(self, dir):
     self.struct = struct.Struct("<l")
     self.pageIdFromName = cdb.init(dir + "pageIdFromName.cdb")
     self.pageNameFromId = cdb.init(dir + "pageNameFromId.cdb")
     self.pageLinksFromId = cdb.init(dir + "pageLinksFromId.cdb")
     self.pageProjectsFromId = cdb.init(dir + "pageProjectsFromId.cdb")
     self.projectIdFromName = cdb.init(dir + "projectIdFromName.cdb")
     self.projectNameFromId = cdb.init(dir + "projectNameFromId.cdb")
Exemple #24
0
 def __init__(self, fname, userdict=None):
     if fname.endswith(".cdb"):
         if not cdb:
             raise RuntimeError("cdb is not supported.")
         self.dict = cdb.init(fname)
     else:
         self.dict = {}
         fp = file(fname)
         while True:
             s = fp.readline()
             if not s: break
             f = s.split("\t")
             self.dict[f[0]] = f[1]
     return
Exemple #25
0
 def __init__(self, fname, userdict=None):
   if fname.endswith(".cdb"):
     if not cdb:
       raise RuntimeError("cdb is not supported.")
     self.dict = cdb.init(fname)
   else:
     self.dict = {}
     fp = file(fname)
     while True:
       s = fp.readline()
       if not s: break
       f = s.split("\t")
       self.dict[f[0]] = f[1]
   return
Exemple #26
0
    def test_default(self):
        proc = CDBFactory()
        self.assertEqual(proc('k1', ['v1']), None)
        self.assertEqual(proc('k2', ['v2', 'v3']), None)
        chunks = proc.close()
        fn = mkstemp()[1]
        fo = open(fn, 'wb')
        for chk in chunks:
            self.assertTrue(len(chk) <= proc.chunksize)
            fo.write(chk)
        fo.close()

        db = cdb.init(fn)
        self.assertEqual([(k, db[k]) for k in db.keys()],
                [('k1', 'v1'), ('k2', 'v2')])
        os.remove(fn)
Exemple #27
0
def phase_1(pkt):
    if pkt.haslayer(Dot11):
        if pkt.type == 0 and pkt.subtype in (0, 2, 4):
            if pkt.addr2 not in clients:
                vendor_id = pkt.addr2[0:8]
                upper_case = str(vendor_id).upper()

                db_name = "mac_address_db"
                db = cdb.cdbmake("../lib/" + db_name, "../lib/"+ db_name + ".tmp")
                del db
                db = cdb.init("../lib/" + db_name)
                match = db.get(upper_case)

                print("{:<6s}{:>13}{:>12s}".format(str(len(clients) + 1), pkt.addr2, match))
                clients.append(pkt.addr2)
                vendors.append(match)
Exemple #28
0
def load_data():
    global definitions
    global wiktionary_definitions
    global redirect

    for x in open(os.path.join(DATA_PATH, 'simple_wiki_fs.txt')):
        x = x.strip()
        a, b = x.split('\t')
        definitions[lower_string(a)] = b

    for x in open(os.path.join(DATA_PATH, 'simple_wiki_redirect.txt')):
        a, b = lower_string(x).split()
        redirect[key_title(a)] = key_title(b)

    wiktionary_definitions = cdb.init(os.path.join(DATA_PATH,
                                                   'wiktionary.cdb'))
def replace_word(key, class_num):
    all_noun = []
    for p in WR_POSTFIX:
        WR = "%s%s" % (WORD_REPLACE, p)
        F = cdb.init(WR)
        noun = F.get(key)
        if noun != None:
            all_noun.extend(noun.rstrip().split('|'))
    rtn = []
    for noun in all_noun:
        now_class, nounList = noun.split('-')
        nounList = nounList.split(':')
        if now_class == class_num:
            nounList = map(lambda x: x.split('#')[0], nounList)
            rtn.extend(nounList)
    return rtn
Exemple #30
0
    def test_reducer(self):
        red = CDBReducer()
        output = red(zip('abcde', '12345'))

        fn = mkstemp()[1]
        fo = open(fn, 'wb')
        fo.writelines(v for k, v in output)
        fo.close()

        db = cdb.init(fn)
        self.assertEqual([(k, db[k]) for k in db.keys()], [('a', '1'),
                                                           ('b', '2'),
                                                           ('c', '3'),
                                                           ('d', '4'),
                                                           ('e', '5')])
        os.remove(fn)
Exemple #31
0
    def test_default(self):
        proc = CDBFactory()
        self.assertEqual(proc('k1', ['v1']), None)
        self.assertEqual(proc('k2', ['v2', 'v3']), None)
        chunks = proc.close()
        fn = mkstemp()[1]
        fo = open(fn, 'wb')
        for chk in chunks:
            self.assertTrue(len(chk) <= proc.chunksize)
            fo.write(chk)
        fo.close()

        db = cdb.init(fn)
        self.assertEqual([(k, db[k]) for k in db.keys()], [('k1', 'v1'),
                                                           ('k2', 'v2')])
        os.remove(fn)
    def openCDB(self):
        prevmask = os.umask(0)
       
        if not os.path.exists(self.path):
            os.makedirs(self.path, 02775)
            os.chown(self.path, self.uid, self.gid)
           
        if not os.path.isfile(self.cdbName):
            maker = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")
            maker.finish()
            del maker
            os.chown(self.cdbName, self.uid, self.gid)
            os.chmod(self.cdbName, 0664)

        os.umask(prevmask)
           
        self.cdbObject = cdb.init(self.cdbName)
Exemple #33
0
    def command(self):
        "command"
        self.init()
        filename = self.options.filename
        if filename is None:
            print "\nThe cdb filename is required\n"
            print self.parser.print_help()
            sys.exit(2)

        if not os.path.isfile(filename):
            print "\nThe cdb filename %s does not exist\n" % filename
            print self.parser.print_help()
            sys.exit(2)

        print "*" * 10, 'Dumping: %s' % filename, "*" * 10
        cdbo = cdb.init(filename)
        cdbdump(cdbo)
Exemple #34
0
    def openCDB(self):
        prevmask = os.umask(0)

        if not os.path.exists(self.path):
            os.makedirs(self.path, 02775)
            os.chown(self.path, self.uid, self.gid)

        if not os.path.isfile(self.cdbName):
            maker = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")
            maker.finish()
            del maker
            os.chown(self.cdbName, self.uid, self.gid)
            os.chmod(self.cdbName, 0664)

        os.umask(prevmask)

        self.cdbObject = cdb.init(self.cdbName)
Exemple #35
0
    def command(self):
        "command"
        self.init()
        filename = self.options.filename
        if filename is None:
            print "\nThe cdb filename is required\n"
            print self.parser.print_help()
            sys.exit(2)

        if not os.path.isfile(filename):
            print "\nThe cdb filename %s does not exist\n" % filename
            print self.parser.print_help()
            sys.exit(2)

        print "*" * 10, 'Dumping: %s' % filename, "*" * 10
        cdbo = cdb.init(filename)
        cdbdump(cdbo)
Exemple #36
0
    def _destroy_item_locked(self, item):
        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if v != item._fs_item_id:
                maker.add(i, v)
            r = c.each()
        maker.finish()

        filesys.rename(self._name_db + '.ndb', self._name_db)
        path = os.path.join(self._path, item._fs_item_id)
        try:
            shutil.rmtree(path)
        except OSError, err:
            raise CouldNotDestroyError("Could not destroy item '%r' [errno: %d]" % (
                item.name, err.errno))
    def sid_to_sentence(self, sid):
        sid = sid.split(':')[-1]

        sub_dirs = sid.split('-')
        if sub_dirs[0] == "w201103":
            if sub_dirs[1] == "":
                sub_dirs[0] = "w201103.old/%s" % (sub_dirs[2])
                sub_dirs.pop(1)
            else:
                sub_dirs[0] = "w201103/%s" % sub_dirs[1]
            sub_dirs.pop(1)

        which_cdb = "%s/%s/%s/%s.cdb" % (self.sentence_cdb_dir, sub_dirs[0], "/".join(sub_dirs[1][:3]), sub_dirs[1][:4])
        if not os.path.isfile(which_cdb):
            sys.stderr.write("cdb file not found for %s.\n" % sid)
            return None

        c = cdb.init(which_cdb)
        return c[sid]
Exemple #38
0
 def __search_cdb(self, pathname, keys, actions, source):
     """
     Search DJB's constant databases; see <http://cr.yp.to/cdb.html>.
     """
     import cdb
     cdb = cdb.init(pathname)
     found_match = 0
     for key in keys:
         if key and key.lower() in cdb:
             found_match = 1
             cdb_value = cdb[key.lower()]
             # If there is an entry for this key,
             # we consider it an overriding action
             # specification.
             if cdb_value:
                 actions.clear()
                 actions.update(self.__buildactions(cdb_value, source))
             break
     return found_match
Exemple #39
0
 def __search_cdb(self, pathname, keys, actions, source):
     """
     Search DJB's constant databases; see <http://cr.yp.to/cdb.html>.
     """
     import cdb
     cdb = cdb.init(pathname)
     found_match = 0
     for key in keys:
         if key and cdb.has_key(string.lower(key)):
             found_match = 1
             cdb_value = cdb[string.lower(key)]
             # If there is an entry for this key,
             # we consider it an overriding action
             # specification.
             if cdb_value:
                 actions.clear()
                 actions.update(self.__buildactions(cdb_value, source))
             break
     return found_match
Exemple #40
0
def check_oldpw(accountname, oldpw):
	passwd_dbfile=os.path.abspath(home_dir+"/passwd.cdb");
	try:
		db=cdb.init(passwd_dbfile)
	except:
		return 'No user database found.'
	try:
		cdb_user_data=db[accountname]
	except:
		return 'User not found or password incorrect.'
	passhash=cdb_user_data[6:40]
	# Hash algorithm is given between first two $ of passhash (here only md5 based BSD password is used)
	hashtype='1'
	# Salt is given between next two $
	salt=passhash[3:11]
	opensslargs = ['openssl', 'passwd', '-'+hashtype, '-salt', salt, oldpass];
	newhash = check_output(opensslargs).strip();
	if newhash == passhash:
		return ''
	return 'User not found or password incorrect.'
Exemple #41
0
    def _rename_item_locked(self, arg):
        item, newname = arg
        nn = newname.encode('utf-8')
        npath = os.path.join(self._path, item._fs_item_id, 'name')

        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if i == nn:
                raise ItemAlreadyExistsError("Target item '%r' already exists!" % newname)
            elif v == item._fs_item_id:
                maker.add(nn, v)
            else:
                maker.add(i, v)
            r = c.each()
        maker.finish()

        filesys.rename(self._name_db + '.ndb', self._name_db)
        nf = open(npath, mode='wb')
        nf.write(nn)
        nf.close()
Exemple #42
0
	def __init__(self, pathKB="/work1/t2g-13IAM/13IAM511/extkb"):
		self.cdbTuples  = cdb.init(os.path.join(pathKB, "tuples.simple.cdb"))
		self.totalFreq = float(open(os.path.join(pathKB, "tuples.simple.totalfreq.txt")).read())
Exemple #43
0
g_wep			= [re.compile(x.strip()[1:-1]) if x.startswith("/") else re.compile("^%s$" % x.strip())
				for x in open(_myfile("weak-evident-preds.txt")) if not x.startswith("#")]
g_pnp			= [x.strip() for x in open(_myfile("proper-name-preds.txt"))]
g_prnp		= [x.strip() for x in open(_myfile("pronoun-preds.txt"))]
g_mp			= [x.strip() for x in open(_myfile("modality-preds.txt"))]
g_handinc = dict([(x.strip(), 1) for x in open(_myfile("incompatible.txt"))])


#
print >>sys.stderr, "Loading schema..."

g_schema = {}

if os.path.exists(_myfile("schemas-size12.cdb")):
	print >>sys.stderr, "Using cache!"
	g_schema = cdb.init(_myfile("schemas-size12.cdb"))
	
else:
	if "schema" in pa.caching: maker = cdb.cdbmake( _myfile("schemas-size12.cdb"), _myfile("schemas-size12.cdb.tmp") )

	schema_id = 0
	
	for score, events, event_scores, roles in re.findall( "\*\*\*\*\*\nscore=([-0-9.]+)\nEvents: (.*?)\nScores: (.*?)\n(.*?)\n\n", open( _myfile("schemas-size12") ).read(), re.MULTILINE|re.DOTALL ):

		schema_id += 1
		scores_dict = {}

		for i, e in enumerate(events.split()):
			scores_dict[e] = float(event_scores.split()[i])

		role_id = 0
Exemple #44
0
    def write_pairs(self, f1, f2):
        '''Parse through two paired files and only write if both pairs are present'''
        def intersect(a, b):
            '''Intesection between lists'''
            return list(set(a) & set(b))

        def rm_files(patterns):
            '''Remove files using glob given as list of patterns'''

            import glob
            import os

            for p in patterns:
                files = glob.glob(p)
                if len(files) == 0:
                    pass
                else:
                    map(os.remove, files)

        def write_out(db_common, f, o):
            '''Write out reads'''

            if self.gz:
                fh = open(f, 'r')
                out = gzip.open(o + '.gz', 'wb')
            else:
                fh = open(f, 'r')
                out = open(o, 'w')

            written_count = 0
            total_count = 0
            for (title, sequence, quality) in FastqGeneralIterator(fh):
                total_count += 1
                if db_common.has_key(title[:-2]):
                    out.write('@%s\n%s\n+\n%s\n' % (title, sequence, quality))
                    written_count += 1
            sys.stderr.write('%s: Total %i, Written %i (%.1f%%)\n' %
                             (f, total_count, written_count,
                              written_count / total_count * 100))
            fh.close()
            out.close()

        def create_db(f, db_fname):
            '''Write out db of headers'''

            fh = open(f, 'r')
            fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh)
                          if not (i % 4))

            db = cdb.cdbmake(db_fname, db_fname + '.tmp')
            for h in fh_headers:
                db.add(h, 'T')
            db.finish()
            del (db)

        ## get headers from both trimmed files ##
        # strip the /2 or /1 and grab only the headers
        # write in dbm to minimze memory usage

        # create db's (parallel)
        rand = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for x in range(36))
        db1_fname = 'db1_%s' % rand
        db2_fname = 'db2_%s' % rand

        jobs = []
        p = multiprocessing.Process(target=create_db, args=(
            f1,
            db1_fname,
        ))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=create_db, args=(
            f2,
            db2_fname,
        ))
        p.start()
        jobs.append(p)

        # wait for jobs to finish
        for job in jobs:
            job.join()

        ## get headers that are in both trimmed files ##
        db1 = cdb.init(db1_fname)
        db2 = cdb.init(db2_fname)
        common = intersect(db1.keys(), db2.keys())

        dbcommon_fname = 'dbcommon_%s' % rand
        db_common = cdb.cdbmake(dbcommon_fname, dbcommon_fname + '.tmp')
        for h in common:
            db_common.add(h, 'T')
        db_common.finish()
        del (db_common)

        ## get headers that are in only one trimmed file ##
        symdiff = set(db1.keys()).symmetric_difference(set(db2.keys()))

        dbdiff_fname = 'dbdiff_%s' % rand
        db_diff = cdb.cdbmake(dbdiff_fname, dbdiff_fname + '.tmp')
        for h in symdiff:
            db_diff.add(h, 'T')
        db_diff.finish()
        del (db_diff)

        ## open common db ##
        db_common = cdb.init(dbcommon_fname)
        jobs = []
        p = multiprocessing.Process(target=write_out,
                                    args=(db_common, f1, self.o[0]))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=write_out,
                                    args=(db_common, f2, self.o[1]))
        p.start()
        jobs.append(p)

        ## open single db ##
        self.single = [self.o[0] + '.single', self.o[1] + '.single']

        db_diff = cdb.init(dbdiff_fname)
        p = multiprocessing.Process(target=write_out,
                                    args=(db_diff, f1, self.single[0]))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=write_out,
                                    args=(db_diff, f2, self.single[1]))
        p.start()
        jobs.append(p)

        # wait for jobs to finish
        for job in jobs:
            job.join()

        rm_files([db1_fname, db2_fname, dbcommon_fname, dbdiff_fname, f1, f2])
Exemple #45
0
import cdb

inputfile = "morepork-dropout-3e116ed1-i15-h399-o2-b1-8000Hz-w512.net"
db = cdb.init(inputfile)

for k in db.keys():
    print k
Exemple #46
0
g_wep			= [re.compile(x.strip()[1:-1]) if x.startswith("/") else re.compile("^%s$" % x.strip())
				for x in open(_myfile("../data/weak-evident-preds.txt")) if not x.startswith("#")]
g_pnp			= [x.strip() for x in open(_myfile("../data/proper-name-preds.txt"))]
g_prnp		= [x.strip() for x in open(_myfile("../data/pronoun-preds.txt"))]
g_mp			= [x.strip() for x in open(_myfile("../data/modality-preds.txt"))]
g_handinc = dict([(x.strip(), 1) for x in open(_myfile("../data/incompatible.txt"))])


#
print >>sys.stderr, "Loading schema..."

g_schema = {}

if os.path.exists(_myfile("../data/schemas-size12.cdb")):
	print >>sys.stderr, "Using cache!"
	g_schema = cdb.init(_myfile("../data/schemas-size12.cdb"))
	
else:
	if "schema" in pa.caching: maker = cdb.cdbmake( _myfile("../data/schemas-size12.cdb"), _myfile("../data/schemas-size12.cdb.tmp") )

	schema_id = 0
	
	for score, events, event_scores, roles in re.findall( "\*\*\*\*\*\nscore=([-0-9.]+)\nEvents: (.*?)\nScores: (.*?)\n(.*?)\n\n", open( _myfile("../data/schemas-size12") ).read(), re.MULTILINE|re.DOTALL ):

		schema_id += 1
		scores_dict = {}

		for i, e in enumerate(events.split()):
			scores_dict[e] = float(event_scores.split()[i])

		role_id = 0
Exemple #47
0
 def keys(self):
     try:
         return self.db.keys()
     except:
         self.db = cdb.init(self.fn)
         return self.db.keys()
Exemple #48
0
 def __init__(self, cdbname):
     CMap.__init__(self)
     self.cdbname = cdbname
     self.db = cdb.init(cdbname)
     return
Exemple #49
0
            try:
                os.mkdir(ipath)
                done = True
            except OSError, err:
                if err.errno != errno.EEXIST:
                    raise
            if cntr > 2 and not done and self._itemspace <= 2 ** 31:
                self._itemspace *= 2
                cntr = 0
            elif cntr > 20:
                # XXX: UnexpectedBackendError() that propagates to user?
                raise Exception('Item space full!')

        nn = item.name.encode('utf-8')

        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if i == nn:
                # Oops. This item already exists! Clean up and error out.
                maker.finish()
                os.unlink(self._name_db + '.ndb')
                os.rmdir(ipath)
                if newrev is not None:
                    os.unlink(newrev)
                raise ItemAlreadyExistsError("Item '%r' already exists!" % item.name)
            else:
                maker.add(i, v)
            r = c.each()
Exemple #50
0
 def __init__(self, filename):
     self.db = cdb.init(filename)
Exemple #51
0
def main():

  reader_format = 'pb'
  writer_format = 'pb'
  delim = '\t'
  fields = []
  key = None
  typename = ""
  pb2file = None
  indextype = None
  indexreader = None
  indexfile = None
  fin = None
  fout = sys.stdout
  infile = None
  verbose = 0

  opts, args = getopt.getopt(sys.argv[1:], 'R:W:F:d:p:k:i:x:t:m:v')

  for o, a in opts:
    if o == '-R':
      reader_format = a
    elif o == '-W':
      writer_format = a
    elif o == '-F':
      fields = a.split(',')
    elif o == '-d':
      delim = a
    elif o == '-p':
      pb2file = a
    elif o == '-m':
      typename = a
    elif o == '-k':
      key = a
    elif o == '-i':
      indextype = a
    elif o == '-x':
      indexfile = a
    elif o == '-v':
      verbose += 1

  if key == None:
    raise Exception("missing key parameter, specify with -k")

  if not len(args):
    raise Exception("missing input data file argument")

  infile = shift(args)
  fin = file(infile)

  # create the index reader object

  if indextype == 'cdb':

    import cdb
    indexreader = cdb.init( indexfile )

  elif indextype == None:

    raise Exception("missing index type parameter, specify with -i")

  # initialize reader / writer codecs

  if pb2file:
    import lwpb.codec
    pb2codec = lwpb.codec.MessageCodec( pb2file=pb2file, typename=typename )

  if len(fields):
    import percent.codec
    txtcodec = percent.codec.PercentCodec( fields, delim )

  # create the stream reader

  if reader_format == 'pb':
    import lwpb.stream
    reader = lwpb.stream.StreamReader( fin, codec=pb2codec )
  elif reader_format == 'txt':
    import percent.stream
    reader = percent.stream.PercentCodecReader( fin, txtcodec )
  else:
    raise Exception("bad reader format")

  # create the stream writer

  if writer_format == 'pb':
    import lwpb.stream
    writer = lwpb.stream.StreamWriter( fout, codec=pb2codec )
  elif writer_format == 'txt':
    import percent.stream
    writer = percent.stream.PercentCodecWriter( fout, txtcodec )
  else:
    raise Exception("bad writer format")


  # lookup, read, and write records

  for line in sys.stdin:
    indexkey = line.strip('\r\n')

    for indexvalue in indexreader.getall(indexkey):
      offset = long( indexvalue )
      fin.seek( offset, os.SEEK_SET )
      record = reader.read()
      writer.write( record )

  return 0
 def _get_reader(self, **kwargs):
     self.db.finish()
     return cdb.init(self.cdb_path.encode('utf-8'), **kwargs)
Exemple #53
0
 def __init__(self, filename):
     self.db = cdb.init(filename.encode('utf8'))
Exemple #54
0
 def __init__(self, filename):
     self.db = cdb.init(filename)
Exemple #55
0
#!/usr/bin/env python
# check the contents of the PDF url mapping table.
import cdb
db = cdb.init('omega/cdb/pdfurl')
for key in db.keys():
    print key,' ',db.get(key)
Exemple #56
0
 def write_pairs(self, f1, f2):
    '''Parse through two paired files and only write if both pairs are present'''
    
    def intersect(a, b):
       '''Intesection between lists'''
       return list(set(a) & set(b))
    
    def rm_files(patterns):
       '''Remove files using glob given as list of patterns'''
       
       import glob
       import os
       
       for p in patterns:
          files = glob.glob(p)
          if len(files) == 0:
             pass
          else:
             map(os.remove, files)
    
    def write_out(db_common, f, o):
       '''Write out reads'''
       
       if self.gz:
          fh = open(f, 'r')
          out = gzip.open(o+'.gz', 'wb')
       else:
          fh = open(f, 'r')
          out = open(o, 'w')
       
       written_count = 0
       total_count = 0
       for (title, sequence, quality) in FastqGeneralIterator(fh):
          total_count += 1
          if db_common.has_key(title[:-2]):
             out.write('@%s\n%s\n+\n%s\n' % (title, sequence, quality))
             written_count += 1
       sys.stderr.write('%s: Total %i, Written %i (%.1f%%)\n' % (f, total_count, written_count, written_count/total_count*100))
       fh.close()
       out.close()
    
    def create_db(f, db_fname):
       '''Write out db of headers'''
       
       fh = open(f, 'r')
       fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh) if not (i % 4))
       
       db = cdb.cdbmake(db_fname, db_fname + '.tmp')
       for h in fh_headers:
          db.add(h, 'T')
       db.finish()
       del(db)
    
    ## get headers from both trimmed files ##
    # strip the /2 or /1 and grab only the headers
    # write in dbm to minimze memory usage
    
    # create db's (parallel)
    rand = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(36))
    db1_fname = 'db1_%s' % rand
    db2_fname = 'db2_%s' % rand
    
    jobs = []
    p = multiprocessing.Process(target=create_db, args=(f1, db1_fname, ))
    p.start()
    jobs.append(p)
    
    p = multiprocessing.Process(target=create_db, args=(f2, db2_fname, ))
    p.start()
    jobs.append(p)
    
    # wait for jobs to finish
    for job in jobs:
       job.join()
    
    ## get headers that are in both trimmed files ##
    db1 = cdb.init(db1_fname)
    db2 = cdb.init(db2_fname)
    common = intersect(db1.keys(), db2.keys())
    
    dbcommon_fname = 'dbcommon_%s' % rand
    db_common = cdb.cdbmake(dbcommon_fname, dbcommon_fname + '.tmp')
    for h in common:
       db_common.add(h, 'T')
    db_common.finish()
    del(db_common)
    
    ## get headers that are in only one trimmed file ##
    symdiff = set(db1.keys()).symmetric_difference(set(db2.keys()))
    
    dbdiff_fname = 'dbdiff_%s' % rand
    db_diff = cdb.cdbmake(dbdiff_fname, dbdiff_fname + '.tmp')
    for h in symdiff:
       db_diff.add(h, 'T')
    db_diff.finish()
    del(db_diff)
    
    
    ## open common db ##
    db_common = cdb.init(dbcommon_fname)
    jobs = []
    p = multiprocessing.Process(target=write_out, args=(db_common, f1, self.o[0]))
    p.start()
    jobs.append(p)
    
    p = multiprocessing.Process(target=write_out, args=(db_common, f2, self.o[1]))
    p.start()
    jobs.append(p)
    
    ## open single db ##
    self.single = [self.o[0]+'.single', self.o[1]+'.single']
    
    db_diff = cdb.init(dbdiff_fname)
    p = multiprocessing.Process(target=write_out, args=(db_diff, f1, self.single[0]))
    p.start()
    jobs.append(p)
    
    p = multiprocessing.Process(target=write_out, args=(db_diff, f2, self.single[1]))
    p.start()
    jobs.append(p)
    
    # wait for jobs to finish
    for job in jobs:
       job.join()
    
    rm_files([db1_fname, db2_fname, dbcommon_fname, dbdiff_fname, f1, f2])
Exemple #57
0
def lookup_by_id(config, eon_id):
    name = os.path.join(config.get("broker", "grn_to_eonid_map_location"),
                        "eon_catalog_by_id.cdb")
    cdb_file = cdb.init(name)
    return cdb_file.has_key(str(eon_id))
Exemple #58
0
	def __init__(self, path = "/work/naoya-i/kb/"):
		self.w2vi = cdb.init(os.path.join(path, "GoogleNews-vectors-negative300.index.cdb"))
		self.w2vf = open(os.path.join(path, "GoogleNews-vectors-negative300.bin"), "rb")
		self.w2vdb = mmap.mmap(self.w2vf.fileno(), 0, prot=mmap.PROT_READ)