Beispiel #1
0
    def __init__(self,
                 index,
                 lm,
                 input,
                 input_type,
                 binary_interval,
                 decode_mode=None):
        """Initialise the sentence either from text or from an interval."""

        assert (input_type in ("text", "interval"))
        assert (binary_interval in (None, "sub", "super"))

        self.index = index
        self.lm = lm

        if input_type == "text":
            text = input
            raw_token_strings = text2token_strings(text)
            self.token_strings = normalise_and_explode_tokens(
                raw_token_strings)
            self.token_indices = tuple(map(self.index.s2i, self.token_strings))
            self.interval = encode(lm.conditional_interval, self.token_indices)
            if binary_interval:
                self.bits = interval2bits(self.interval, binary_interval)
        elif input_type == "interval":
            assert (decode_mode in ("deep", "shallow"))
            start_interval = input
            if decode_mode == "deep":
                decoding_result = deep_decode(lm.next,
                                              start_interval,
                                              end=self.index.s2i("_END_"))
            elif decode_mode == "shallow":
                decoding_result = decode(lm.next, start_interval)
            self.token_indices = decoding_result.sequence
            self.token_strings = tuple(map(self.index.i2s, self.token_indices))
            self.interval = decoding_result.interval
            if binary_interval:
                self.bits = interval2bits(self.interval, binary_interval)
Beispiel #2
0
                    help="normalise and explode tokens")
args = parser.parse_args()

# Load the index
if args.index:
    print_status("Started loading index from", args.index)
    with open(args.index, "r") as f:
        index = bindb.BinDBIndex(f)
    print_status("Finished loading index")

while True:
    try:
        text = input('--> ')
    except KeyboardInterrupt:
        print()
        break

    token_strings = text2token_strings(text)

    if args.normalise:
        token_strings = normalise_and_explode_tokens(token_strings)

    print(" ".join(token_strings))

    if args.index:
        try:
            token_indices = tuple(map(index.s2i, token_strings))
            print(token_indices)
        except KeyError as e:
            print("KeyError: {e} is not in the index".format(**locals()))
Beispiel #3
0
offset = 0

# Load language model
print("Loading language model...")
lm = bindb.BinDBLM(
    "/Users/kkom/Desktop/bindb-normalised/counts-consistent-tables", n, start,
    end, beta, gamma, offset)

# Load index
print("Loading words index...")
with open("/Users/kkom/Desktop/bindb-normalised/index", "r") as f:
    index = bindb.BinDBIndex(f)

# Invent plaintext
plaintext = "who do you miss the most?"
plaintext_strings = normalise_and_explode_tokens(text2token_strings(plaintext))
plaintext_indices = tuple(map(index.s2i, plaintext_strings))

print("Plaintext: " + plaintext, end="\n\n")
print("Plaintext token strings: " + str(plaintext_strings), end="\n\n")
print("Plaintext token indices: " + str(plaintext_indices), end="\n\n")

# Invent a password
password = "******"
password_strings = normalise_and_explode_tokens(text2token_strings(password))
password_indices = tuple(map(index.s2i, password_strings))

print("Password: "******"\n\n")
print("Password token strings: " + str(password_strings), end="\n\n")
print("Password token indices: " + str(password_indices), end="\n\n")
Beispiel #4
0
print("gamma: {gamma}".format(**locals()))
print("offset: {offset}".format(**locals()))
print()

# Load language model
lm = bindb.BinDBLM(
    "/Users/kkom/Desktop/bindb-normalised/counts-consistent-tables", n, start,
    end, beta, gamma, offset)

# Load index
with open("/Users/kkom/Desktop/bindb-normalised/index", "r") as f:
    index = bindb.BinDBIndex(f)

# Invent a sentence
text = "Hey!  What the f**k is going on here?"
token_strings = normalise_and_explode_tokens(text2token_strings(text))
token_indices = tuple(map(index.s2i, token_strings))

print(text)
print()
print(" ".join(token_strings))
print()
print(token_indices)
print()

# Get the next token after "is" given some intervals
context = token_indices[:9]

intervals = (create_interval(0, 1,
                             1000000), create_interval(345246, 56, 1000000),
             create_interval(5465477, 322, 10000000),