Example #1
0
LODStats is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with LODStats.  If not, see <http://www.gnu.org/licenses/>.
"""
import bitarray
from hashlib import md5
from LimitedSizeDict import LimitedSizeDict

# FIXME: does it help to build some small "md5-cache" for the last 1, 2, 3 strings?!

# subjects
distinct_subjects = LimitedSizeDict(size_limit=300000) # FIXME: make limit configurable
# 0: entities, 1: typed subjects, 2: labeled subjects

def query_distinct_subject(s, num_id):
    if len(s) > 16:
        s_hash = md5(s).digest()
    else:
        s_hash = s
    if distinct_subjects.has_key(s_hash):
        return distinct_subjects[s_hash][num_id]
    else:
        return False
        
def set_distinct_subject(s, num_id):
    if len(s) > 16:
        s_hash = md5(s).digest()