Ejemplo n.º 1
0
    def second_pass(self, file_lines):
        memory_address = self.MEM_START_ADDR
        for line in file_lines:
            parser = Parser(instruction=line)
            encoder = Encoder(instruction_type=parser.instruction_type)

            if parser.instruction_type == InstructionType.c_instruction:
                hack_line = encoder.encode(dest=parser.dest,
                                           comp=parser.comp,
                                           jump=parser.jump)

            elif parser.instruction_type == InstructionType.a_instruction:
                try:
                    integer_address = int(parser.address)
                except ValueError:
                    if self.symbol_table.get(parser.address) is None:
                        self.symbol_table[parser.address] = memory_address
                        memory_address += 1

                    integer_address = self.symbol_table.get(parser.address)

                hack_line = encoder.encode(address=integer_address)

            else:
                continue

            self.hack_file.write(hack_line + '\r\n')
Ejemplo n.º 2
0
    def _build(self, Log, Scorer):
        if Log == None:
            Log = Logger()
        if Scorer == None:
            Scorer = Score()

        self.Log = Log
        self.Scorer = Scorer
        self.Encoder = Encoder()
        self.GPT = GPT2LanguageModel(model_name=self.model)
Ejemplo n.º 3
0
    def _build(self, mod, Log):
        ''' Builds application using variables provided by user '''
        if Log == None:
            Log = Log()

        if (self._seed < 1):
            random.seed(time.time())
            self._seed = random.random()

        self.Log = Log
        self.Scorer = Score(mod, self.Log)
        self.Encoder = Encoder(seed=self._seed, probability=self._probability)
        self.GPT = GPT2LanguageModel(model_name=self.model)
Ejemplo n.º 4
0
    def __init__(self, params, embedding_matrix):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.word_embeddings = nn.Embedding(params.word_vocab_size, params.word_embed_size)
        self.word_embeddings.weight = Parameter(t.from_numpy(embedding_matrix).float(),
                                                requires_grad=False)
        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)

        self.decoder = Decoder(self.params)
Ejemplo n.º 5
0
 def load_model_from_package(cls, package):
     encoder = Encoder(package['d_input'],
                       package['n_layers_enc'],
                       package['n_head'],
                       package['d_k'],
                       package['d_v'],
                       package['d_model'],
                       package['d_inner'],
                       dropout=package['dropout'],
                       pe_maxlen=package['pe_maxlen'])
     decoder = Decoder(
         package['sos_id'],
         package['eos_id'],
         package['vocab_size'],
         package['d_word_vec'],
         package['n_layers_dec'],
         package['n_head'],
         package['d_k'],
         package['d_v'],
         package['d_model'],
         package['d_inner'],
         dropout=package['dropout'],
         tgt_emb_prj_weight_sharing=package['tgt_emb_prj_weight_sharing'],
         pe_maxlen=package['pe_maxlen'],
     )
     model = cls(encoder, decoder)
     model.load_state_dict(package['state_dict'])
     LFR_m, LFR_n = package['LFR_m'], package['LFR_n']
     return model, LFR_m, LFR_n
Ejemplo n.º 6
0
def main():
    # Load a dictionary of Michael's quotes to their season and episode
    print("Attempting to load quotes from file")
    quotes = load_quotes()
    if quotes is None:
        print("Scraping the web for new quotes")
        quotes = scrape()

    print("Creating sentence encoder")
    encoder = Encoder()

    print("Attempting to load quote embeddings from file")
    quote_embeddings = load_quote_embeddings()
    if quote_embeddings is None:
        print("Generating new quote embeddings")
        quote_embeddings = generate_quote_embeddings(encoder, quotes)
        print("Saving new quote embeddings to {0}".format(embeddings_file))
        save_pickle(quote_embeddings, embeddings_file)

    print("Creating predictor")
    predictor = Predictor(encoder, quote_embeddings)

    while True:
        input_sentence = query_input()
        prediction = predictor.predict_output(input_sentence)
        output_quote = prediction[0]
        output_season = prediction[1]['season']
        output_episode = prediction[1]['episode']
        print("Michael says: \"{0}\" in season {1}, episode {2}".format(
            output_quote, output_season, output_episode))
Ejemplo n.º 7
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)
    return model
Ejemplo n.º 8
0
def train_model(dpath, ppath, epoch, version):
    if dpath.endswith(".csv"):
        d = pd.read_csv(dpath)
    else:
        raise ValueError("data format is not supported")

    pipe = joblib.load(ppath)
    encoder = Encoder(pipe)
    x = encoder.encode(d.iloc[:, 1:-1])

    m = create_model(
        [
            x.shape[1],
        ]
    )
    m.fit(x, d.iloc[:, -1], batch_size=1000, epochs=epoch)
    m.save(f"model/{version}")
Ejemplo n.º 9
0
def run_ensemble(train_df):
    encoder = None
    encoder = Encoder(train_df)
    encoder.transform(train_df)
    estimators = []
    scores = []
    labels = []
    nums = list(range(1, 5, 1)) + list(range(5, 60, 5)) + list(
        range(60, 100, 10)) + list(range(100, 500, 50))
    for n in nums:
        lr = modelDict["GBM"](n_estimators=n)
        n_train_df = pd.get_dummies(train_df)
        train_score, val_score = lr.train(n_train_df)
        scores += [train_score, val_score]
        estimators += [n, n]
        labels += ['train', 'val']
    return scores, labels, estimators
Ejemplo n.º 10
0
def run_tests():
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial,
                k=e.k,
                t=e.r,
                gf_index=e.gf.index)

    message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrfvtgbyhnujmzaqwsxcderf"
    codeword = e.encode(message)
    decoded_message = d.decode(codeword, 'basic')

    for i in range(2, 28):
        codeword.elements[i] = codeword.elements[i].multiplicative_inversion()
    print('27 errors occurred...')
    print(codeword)
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])
Ejemplo n.º 11
0
def test_integration(model_service, xy):
    p = Path(curdir / "saved_model")
    assert "http://127.0.0.1:8501" == model_service

    fname = curdir / ".tmp.joblib"
    train_sk_pipe(fname, xy[0])
    assert os.path.exists(fname)
    pipe = load(fname)

    encoder = Encoder(pipe)
    matrixs = encoder.encode(xy[0][:100]).tolist()

    res = requests.post(
        model_service + "/v1/models/tp_pred:predict",
        data=json.dumps({"instances": matrixs}),
    )

    assert len(res.json()["predictions"]) == 100
Ejemplo n.º 12
0
def run_example_program():
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial,
                k=e.k,
                t=e.r,
                gf_index=e.gf.index)

    message = "zaqwsxcderfvbgtyhnmjuik,ol.p;/zaqwsxedcrf"
    print('Message: ' + message)
    codeword = e.encode(message)
    print('Codeword: ' + str(codeword))
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])

    for i in range(1, 28):
        codeword.elements[i] = codeword.elements[i].multiplicative_inversion()
    print('27 errors occurred...')
    print(codeword)
    decoded_message = d.decode(codeword, 'basic')
    print('Decoded message: ' + decoded_message[:len(message)])
Ejemplo n.º 13
0
def test_encoder_fix_errors(ii, k, test_type, message):
    e = Encoder()
    d = Decoder(coding_polynomial=e.coding_polynomial, k=e.k, t=e.r, gf_index=e.gf.index)
    encoded_message = e.encode(message)

    if test_type == 'multiple':
        random_indexes = random.sample(range(0, len(encoded_message)), k)
    else:
        random_start = random.randint(0, len(encoded_message)-k-1)
        random_indexes = [i for i in range(random_start, random_start + k)]
    # print("{}): {}".format(k, random_indexes))
    for i in random_indexes:
        encoded_message.elements[i] = encoded_message.elements[i].multiplicative_inversion()
    try:
        start = time.time()
        decoded_message = d.decode(encoded_message, 'basic')
        stop = time.time()
        passed.write("{}, {}, {}, {}, {}\n".format(k, test_type, message, random_indexes, stop-start))
    except CannotDetectErrorException as c:
        failed.write("{}, {}, {}, {}\n".format(k, test_type, message, random_indexes))
        assert False
    assert message in decoded_message
Ejemplo n.º 14
0
def run(train_df, test_df):
    encoder = None
    encoder = Encoder(train_df)
    lr = modelDict["GBM"](need_scale=False)
    encoder.transform(train_df)
    n_train_df = pd.get_dummies(train_df)
    lr.train(n_train_df)
    encoder.transform(test_df)
    n_test_df = pd.get_dummies(test_df)
    y = lr.test(n_test_df)
    save(test_df, y, encoder)
Ejemplo n.º 15
0
 def __init__(self):
     self.leftFEncoder = Encoder()
     self.leftMEncoder = Encoder()
     self.leftREncoder = Encoder()
     self.rightFEncoder = Encoder()
     self.rightMEncoder = Encoder()
     self.rightREncoder = Encoder()
     self.pose = Pose()
     self.lastTime = 0
Ejemplo n.º 16
0
class odometry_node:
    def __init__(self):
        self.leftFEncoder = Encoder()
        self.leftMEncoder = Encoder()
        self.leftREncoder = Encoder()
        self.rightFEncoder = Encoder()
        self.rightMEncoder = Encoder()
        self.rightREncoder = Encoder()
        self.pose = Pose()
        self.lastTime = 0

    def setTime(self, newTime):
        self.lastTime = newTime

    def main(self):
        self.odomPub = rospy.Publisher('odometry/filtered',
                                       Odometry,
                                       queue_size=10)
        self.tfPub = TransformBroadcaster()

        rospy.init_node('odometry_node')
        self.nodeName = rospy.get_name()
        rospy.loginfo("{0} started".format(self.nodeName))

        self.ticksPerMeter = int(rospy.get_param('~ticks_per_meter', 780))
        self.wheelSeparation = float(rospy.get_param('~wheel_separation', 0.7))
        self.rate = float(rospy.get_param('~rate', 10.0))
        self.baseFrameID = rospy.get_param('~base_frame_id', 'base_footprint')
        self.odomFrameID = rospy.get_param('~odom_frame_id', 'odom')
        self.encoderMin = int(rospy.get_param('~encoder_min', -32768))
        self.encoderMax = int(rospy.get_param('~encoder_max', 32767))

        self.setTime(rospy.get_time())
        rate = rospy.Rate(self.rate)
        rospy.Subscriber("encoder_counts",
                         EncoderCounts,
                         callback=self.callback)
        rospy.spin()

    def callback(self, data):
        self.calculate_pose(data)

    def calculate_pose(self, data):
        lc = data.left_wheel_counts  #front to back
        rc = data.right_wheel_counts  #same here yo
        #update left encoders
        self.leftFEncoder.update(lc[0])
        self.leftMEncoder.update(lc[1])
        self.leftREncoder.update(lc[2])
        #update right encoders
        self.rightFEncoder.update(rc[0])
        self.rightMEncoder.update(rc[1])
        self.rightREncoder.update(rc[2])
        #get Travels
        leftFTravel = self.leftFEncoder.getDelta() / self.ticksPerMeter
        leftMTravel = self.leftMEncoder.getDelta() / self.ticksPerMeter
        leftRTravel = self.leftREncoder.getDelta() / self.ticksPerMeter
        rightFTravel = (self.rightFEncoder.getDelta() / self.ticksPerMeter)
        rightMTravel = (self.rightMEncoder.getDelta() / self.ticksPerMeter)
        rightRTravel = (self.rightREncoder.getDelta() / self.ticksPerMeter)
        rospy.loginfo(leftFTravel)
        rospy.loginfo(rightFTravel)
        #time stuff
        newTime = rospy.get_time()
        deltaTime = newTime - self.lastTime
        self.setTime(newTime)
        #middle travel
        aveLT = (leftFTravel + leftMTravel + leftRTravel) / 3
        aveRT = (rightFTravel + rightMTravel + rightRTravel) / 3
        rospy.loginfo(aveLT)
        rospy.loginfo(aveRT)
        midTravel = (aveRT + (-1 * (aveLT))) / 2
        midTheta = (aveRT - (-1 * (aveLT))) / self.wheelSeparation
        if -1 * aveRT == aveLT:
            deltaX = aveLT * cos(self.pose.theta)
            deltaY = aveLT * sin(self.pose.theta)
        else:
            radius = midTravel / midTheta
            # Find the instantaneous center of curvature (ICC) offset.
            iccDx = radius * sin(self.pose.theta)
            iccDy = -radius * cos(self.pose.theta)

            deltaX = cos(midTheta) * iccDx + sin(midTheta) * iccDy - iccDx
            deltaY = sin(midTheta) * iccDx + cos(midTheta) * iccDy - iccDy

#set poses
        self.pose.x += deltaX
        self.pose.y += deltaY
        self.pose.theta = (self.pose.theta + midTheta) % (2 * pi)
        self.pose.xVel = midTravel / deltaTime if deltaTime > 0 else 0.
        self.pose.yVel = 0
        self.pose.thetaVel = midTheta / deltaTime if deltaTime > 0 else 0.

        self.lastTime = newTime

        now = rospy.get_rostime()

        q = Quaternion()
        q.x = 0
        q.y = 0
        q.z = sin(self.pose.theta / 2)
        q.w = cos(self.pose.theta / 2)
        self.tfPub.sendTransform((self.pose.x, self.pose.y, 0),
                                 (q.x, q.y, q.z, q.w), now, self.baseFrameID,
                                 self.odomFrameID)

        odom = Odometry()
        odom.header.stamp = now
        odom.header.frame_id = self.odomFrameID
        odom.child_frame_id = self.baseFrameID
        odom.pose.pose.position.x = self.pose.x
        odom.pose.pose.position.y = self.pose.y
        odom.pose.pose.position.z = 0
        odom.pose.pose.orientation = q
        odom.pose.covariance[0] = 1e-3
        odom.pose.covariance[7] = 1e-3
        odom.pose.covariance[14] = 1e-3
        odom.pose.covariance[21] = 100000
        odom.pose.covariance[28] = 100000
        odom.pose.covariance[35] = 1e-3

        odom.twist.twist.linear.x = self.pose.xVel
        odom.twist.twist.linear.y = 0
        odom.twist.twist.angular.z = self.pose.thetaVel
        odom.twist.covariance[0] = 1e-3
        odom.twist.covariance[7] = 1e-3
        odom.twist.covariance[14] = 100000
        odom.twist.covariance[21] = 100000
        odom.twist.covariance[28] = 100000
        odom.twist.covariance[35] = 1e-3
        self.odomPub.publish(odom)

    def __getitem__(self, item):
        return getattr(self, item)
Ejemplo n.º 17
0
    def test_minimal(self):
        num_words, num_tags, num_chars = 10, 10, 100
        encoder = Encoder(num_words, num_tags, num_chars=num_chars)

        assert encoder.num_tags == num_tags
        assert isinstance(encoder.word_embedding, nn.Embedding)
Ejemplo n.º 18
0
def encoder():
    e = Encoder()
    return e
Ejemplo n.º 19
0
def main(args):
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    tr_dataset = AudioDataset('train', args.batch_size)
    cv_dataset = AudioDataset('dev', args.batch_size)

    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=tr_dataset.path_lst,
                                label_list=tr_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=cv_dataset.path_lst,
                                label_list=cv_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)

    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}

    encoder = Encoder(args.d_input * args.LFR_m,
                      args.d_low_dim,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)
    print(model)
    model.cuda()
    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.init_lr, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Ejemplo n.º 20
0
word_map['<unk>'] = len(word_map) + 1
word_map['<start>'] = len(word_map) + 1
word_map['<end>'] = len(word_map) + 1
word_map['<pad>'] = 0


checkpoint = '../input/image-copy-2/checkpoint_copy.pt'

decoder = DecoderWithAttention(embed_dim=emb_dim,
                                   decoder_dim=decoder_dim,
                                   vocab_size=len(word_map),
                                   dropout=dropout)
decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
                                     lr=decoder_lr)

encoder = Encoder()


# Move to GPU, if available
decoder = decoder.to(device)
encoder = encoder.to(device)

decoder.eval()
encoder.eval()
from scipy.misc import imread, imresize


if checkpoint is not None:
  checkpoint = torch.load(checkpoint)
  decoder.load_state_dict(checkpoint['decoder_state_dict'])
  decoder_optimizer.load_state_dict(checkpoint['decoder_optimizer_dict'])
Ejemplo n.º 21
0
from termcolor import colored
from src.encoder import Encoder
from src.universal_function import universalFunction


def present():
    print(colored('For following instructions:', 'green'))
    print(open('instructions.txt', "r").read())
    print()
    print(colored('We have this LL(1) grammar:', 'green'))
    print(open('grammar.txt', "r").read())
    print()
    print(colored('Parse table of this grammar is:', 'green'))
    print(open('parse-table.txt', "r").read())


if __name__ == "__main__":
    present()
    # filePath = 'data/in/book-example.txt'
    filePath = 'data/in/test.txt'
    fileString = open(filePath, "r").read()
    print(colored('input file:', 'green'))
    encoder = Encoder(fileString)
    instructions = encoder.encodeLines()
    programCode = encoder.calcuateProgramCode()
    inputValues = encoder.getInputVaules()
    input_for_universal_program = inputValues + [programCode]
    universalFunction(input_for_universal_program, instructions)
Ejemplo n.º 22
0
class pyReadability():
    '''
        Main class for the applicaiton. Loads the data, encodes it and runs scoring algortim.
    '''
    def __init__(self,
                 model,
                 interact,
                 topK,
                 seed,
                 mod,
                 probability,
                 Log=None):
        ''' Take in variables and starts the program. See readme for inputs'''
        self.model = model
        self.topK = topK
        self.interact = interact
        self._seed = seed
        self._probability = probability
        self._score = -1

        self._build(mod, Log)

    def _build(self, mod, Log):
        ''' Builds application using variables provided by user '''
        if Log == None:
            Log = Log()

        if (self._seed < 1):
            random.seed(time.time())
            self._seed = random.random()

        self.Log = Log
        self.Scorer = Score(mod, self.Log)
        self.Encoder = Encoder(seed=self._seed, probability=self._probability)
        self.GPT = GPT2LanguageModel(model_name=self.model)

    def _run(self, text):
        ''' Runs GTP and gets results '''
        logits = self.GPT.predict(text, "")
        probabilities = torch.nn.functional.softmax(logits)

        best_indices = logits.topk(self.topK)[1]
        self.best_words = [self.GPT[idx.item()] for idx in best_indices]
        self.best_probabilities = probabilities[best_indices].tolist()

    def _getWords(self):
        ''' returns Top-K Words from GPT-2 '''
        return self.best_words

    def _getPropability(self):
        ''' returns top-k Propabilities from GPT-2 '''
        return [round(p * 100, 2) for p in self.best_probabilities]

    def _process(self, text, guess):
        ''' scores inputted text and logs it '''

        self._run(text)
        outputLst = self._output()
        self.Log.Trace(("Answer List : {}".format(outputLst)))

        score = self.Scorer.score(outputLst, guess)
        self.Log.Trace(score)

        self.Log.Info("Score of \'{}\': {}".format(score[0], score[1]))

    def start(self, text=""):
        ''' 
            starts program

            text = Text to be inputted
        '''

        if text == "" and not self.interact:
            raise EnvironmentError(
                "Please input valid text or use the --interact flag")

        if text != "":
            encoded = self.Encoder.encode(text=text)
            for item in encoded:
                if item[0] == '':
                    continue
                self._process(item[0], item[1])

        # Code for Manual Input, meant for debugging not for production use
        else:
            while self.interact:
                text = self.Log.Input("Input Text >> ")

                if text == "":
                    self.Log.Info("Please provide a valid input")
                    continue

                if text == "#?":
                    self.Log.Info(
                        "Available Commands: \n#?: Shows available commands\n#end: Ends Execution"
                    )
                    continue

                if text == "#end":
                    self.Log.Info("Ending Program")
                    break

                guess = self.Log.Input("What will the next word be >> ")
                self._process(text, guess)

        self._score = self.Scorer.calcScore()
        self.Log.Info("Normalized Score: {} | Unnormalized Score: {}".format(
            self.getNormScore(), self.getUnNormScore()))

    def getNormScore(self):
        ''' returns the normalized score '''
        return self._score[0]

    def getUnNormScore(self):
        ''' returns the unormalized score '''
        return self._score[1]

    def getSeed(self):
        ''' returns the seed used '''
        return self._seed

    def getEncoder(self):
        ''' returns the encoder object '''
        return self.Encoder

    def _output(self):
        ''' returns top-k words and propabilities '''
        return [(self._getWords()[i], self._getPropability()[i])
                for i in range(self.topK)]
Ejemplo n.º 23
0
class Guesser():
    def __init__(self,
                 model="gpt2",
                 interact=False,
                 score=False,
                 topK=10,
                 Log=None,
                 Scorer=None):
        self.model = model
        self.topK = topK
        self._build(Log, Scorer)

    def _build(self, Log, Scorer):
        if Log == None:
            Log = Logger()
        if Scorer == None:
            Scorer = Score()

        self.Log = Log
        self.Scorer = Scorer
        self.Encoder = Encoder()
        self.GPT = GPT2LanguageModel(model_name=self.model)

    def _getBestWords(self, text):
        ''' Creates finds best words and calculates their propbablity of occuring '''
        logits = self.GPT.predict(text, "")

        best_logits, best_indices = logits.topk(self.topK)

        # converts best indicie list into a list of words
        best_words = [self.GPT[idx.item()] for idx in best_indices]

        # calculates probabilities
        probabilities = torch.nn.functional.softmax(logits)

        # creates a list of probabilites based on best_indicies. This is a parallel array to best_words
        best_probabilities = self._getPropability(
            probabilities[best_indices].tolist())

        # returns a list of tuples. each tuple contains the world in position 0 and the probability in position 1
        return [(best_words[i], best_probabilities[i])
                for i in range(self.topK)]

    def _getPropability(self, probabilities):
        ''' returns top-k Propabilities from GPT-2 '''
        return [round(p * 100, 2) for p in probabilities]

    def _run(self, text, guess):
        ''' scores inputted text and logs it '''

        # gives a list of the best words that GPT predicts
        guessList = self._getBestWords(text)
        self.Log.Info(("Answer List : {}".format(guessList)))

        # scores guess word with the list of preditions
        score = self.Scorer.score(guessList, guess)
        self.Log.Info(score)

    def start(self, text=""):
        '''
        Starts The Program

        text: Text to be fed into GPT. If left blank initiate interactive mode
        '''

        if text != "":
            encoded_text = self.Encoder.encode(text=text)
            for text, guess in encoded_text[0]:
                if text == '':
                    continue
                self._run(text, guess)

        else:
            while True:
                text = self.Log.Input("Input Text >> ")

                if text == "":
                    self.Log.Info("Please provide a valid input")
                    continue

                if text == "#?":
                    self.Log.Info(
                        "Available Commands: \n#?: Shows available commands\n#end: Ends Execution"
                    )
                    continue

                if text == "#end":
                    self.Log.Info("Ending Program")
                    break

                guess = self.Log.Input("What will the next word be >> ")
                self._run(text, guess)

        self.Log.Info("Score: {}".format(self.Scorer.calcScore()))
Ejemplo n.º 24
0
def save(data: pd.DataFrame, y, encoder: Encoder, save_path=out_path):
    data['charges'] = y
    if encoder: encoder.rev_transform(data)
    data.to_csv(save_path, sep=',', index=False)
Ejemplo n.º 25
0
class RVAE_dilated(nn.Module):
    def __init__(self, params, embedding_matrix):
        super(RVAE_dilated, self).__init__()

        self.params = params

        self.word_embeddings = nn.Embedding(params.word_vocab_size, params.word_embed_size)
        self.word_embeddings.weight = Parameter(t.from_numpy(embedding_matrix).float(),
                                                requires_grad=False)
        self.encoder = Encoder(self.params)

        self.context_to_mu = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)
        self.context_to_logvar = nn.Linear(self.params.encoder_rnn_size * 2, self.params.latent_variable_size)

        self.decoder = Decoder(self.params)

    def forward(self, drop_prob,
                encoder_input_tuple=None,
                use_cuda=False,
                z=None,
                inference=False):
        """
        :param encoder_input_tuple: An tensor with shape of [batch_size, seq_len] of Long type

        :param drop_prob: probability of an element of decoder input to be zeroed in sense of dropout

        :param use_cuda: whether to use gpu


        :param z: context if sampling is performing

        :return: unnormalized logits of sentence words distribution probabilities
                    with shape of [batch_size, seq_len, word_vocab_size]
                 kld loss estimation
        """
        encoder_input, lengths = encoder_input_tuple
        batch_size = encoder_input.size()[0]
        encoder_input = Variable(encoder_input, volatile=inference).cuda() if use_cuda else Variable(encoder_input, volatile=inference)
        encoder_input_matrix = self.word_embeddings(encoder_input)

        if z is None:
            ''' Get context from encoder and sample z ~ N(mu, std)
            '''
            packed_seq = pack_padded_sequence(encoder_input_matrix, lengths, batch_first=True)
            context = self.encoder.forward(packed_seq, batch_size)

            mu = self.context_to_mu(context)
            logvar = self.context_to_logvar(context)
            std = t.exp(0.5 * logvar)

            z = Variable(t.randn([batch_size, self.params.latent_variable_size]))
            if use_cuda:
                z = z.cuda()

            z = z * std + mu
            kld = (-0.5 * t.sum(logvar - t.pow(mu, 2) - t.exp(logvar) + 1, 1)).mean()
        else:
            kld = None

        decoder_input = encoder_input_matrix
        out = self.decoder.forward(decoder_input, z, drop_prob)

        return encoder_input, out, kld

    def learnable_parameters(self):

        # word_embedding is constant parameter thus it must be dropped from list of parameters for optimizer
        return [p for p in self.parameters() if p.requires_grad]

    def trainer(self, optimizer, use_cuda, dropout):

        def train_one_batch(data_tuple):
            target, logits, kld = self.forward(drop_prob=dropout,
                                       encoder_input_tuple=data_tuple,
                                       use_cuda=use_cuda,
                                       z=None)

            batch_size = target.data.size()[0]
            sequence_length = target.data.size()[1]

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)
            loss = sequence_length * cross_entropy + kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return kld, loss

        return train_one_batch


    def validater(self, use_cuda, dropout):
        def validate(data_tuple):
            target, logits, kld = self.forward(drop_prob=dropout,
                                       encoder_input_tuple=data_tuple,
                                       use_cuda=use_cuda,
                                       z=None)
            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)
            return np.exp2(cross_entropy.data.cpu().numpy()[0])

        return validate
Ejemplo n.º 26
0
    :return:
    """
    parser = ArgumentParser()
    parser.add_argument("path", help="path to .csv data file")
    return parser.parse_args()

if __name__ == '__main__':
    args = args()
    DATA_PATH = args.path

    seed = 42
    np.random.seed(seed)

    harvester = DataHarvester(DATA_PATH)
    harvester.read_file()
    harvester.cut_lines()

    encoder = Encoder(harvester.read_data)
    encoder.encode_data()
    encoder.encode_label()

    X = encoder.encoded
    Y = encoder.encoded_label

    model_builder = ModelBuilder(encoder.num_of_label_classes, encoder.num_of_data_classes)
    estimator = KerasClassifier(build_fn=model_builder, epochs=20, batch_size=5, verbose=5)
    kfold = KFold(n_splits=30, shuffle=True, random_state=seed)

    results = cross_val_score(estimator, X, Y, cv=kfold)
    print("Baseline: %.2f%% (%.2f%%)" % (results.mean() * 100, results.std() * 100))
Ejemplo n.º 27
0
 def __init__(self, hidden_encoder_size, z_dim, hidden_decoder_size, output_size, rnn_type, device):
     super(GrammarVAE, self).__init__()
     self.encoder = Encoder(hidden_encoder_size, z_dim)
     self.decoder = Decoder(z_dim, hidden_decoder_size, output_size, device, rnn_type)
     self.device = device