Ejemplo n.º 1
0
def main(argv):
	parser = argparse.ArgumentParser(description=' Batch Movie Encoder. Converts from one container to another with specified codec for all files in the specified Destination path.',
	epilog='If the codec specified is the same as the codec in the source container only a copy will not be done and no transcoding to speed up the process.')
	
	parser.add_argument('-p',help='Destination path',required=True)
	
	parser.add_argument('-i',choices=Encoder.SUPPORTEDINPUTCONTAINER,
	help='The input movie container format. Generally the same as the file extension. Eg .avi',required=True)
	
	parser.add_argument('-o',choices=Encoder.SUPPORTEDTARGETCONTAINER,
	help='The output movie container format.',required=True)
	
	parser.add_argument('-c',choices=Encoder.SUPPORTEDTARGETVIDEOFORMATS,
	help='The video codec to encode with.',required=False,default='H264')
	
	parser.add_argument('-r',help='Recurse subdirectories.')
	parser.add_argument('-d',help='Delete source file after conversion completion.')
	
	EncoderOptions= parser.parse_args()
	
	try:
		movieEncoder=Encoder(EncoderOptions.p,EncoderOptions.i,EncoderOptions.o,EncoderOptions.c)
		movieEncoder.queueFiles(helper.getFilesExt(EncoderOptions.p,EncoderOptions.i))
		movieEncoder.encodeBatch()
	except Exception as e:
		print e
Ejemplo n.º 2
0
 def encode(self):
     raw_content = open(self.args.input, 'r').read()
     encoder = Encoder(raw_content)
     encoded_content, reversed_mappings = encoder.encode()
     with open(self.args.output, 'wb') as f:
         f.write(encoded_content)
     with open(self.args.mappings, 'w') as f:
         json_mappings = json.dumps(reversed_mappings, separators=(',', ':'))
         f.write(json_mappings)
Ejemplo n.º 3
0
def translate(**kwargs):
    ## logging configuration
    log_lvl = log_lvls.get(kwargs.get('log_lvl', '10'))
    logging.config.fileConfig(os.path.join(pwd, "logging.conf"))
    logging.getLogger().setLevel(log_lvl)
    
    prg = kwargs.get('prg', None)
    out_dir = kwargs.get('out_dir', res_dir)
    sk = kwargs.get('sketch', True)
    fs = kwargs.get('fs', False)
    cgen = kwargs.get('custom_gen', False)
    cntr = kwargs.get('cntr', False)
    skv = kwargs.get('skv', 0)
    lib = kwargs.get('lib', True)

    codegen_jar = os.path.join(root_dir, "codegen", "lib", "codegen.jar")
    
    logging.info('parsing {}'.format(prg))
    prg_ast = parse(prg,lib=lib)
    util.add_object(prg_ast)
 
    encoder = Encoder(prg_ast, out_dir, fs)
    logging.info('encoding to Sketch')
    encoder.to_sk()
 
    # Sketch options
    opts = kwargs.get('opts', [])

    # print counter examples
    if cntr: opts.extend(['-V3', '--debug-cex'])
    if skv != 0: opts.extend(['-V{}'.format(skv)])

    # place to keep sketch's temporary files
    opts.extend(["--fe-tempdir", out_dir])
    opts.append("--fe-keep-tmp")
 
    # custom codegen
    if cgen: opts.extend(["--fe-custom-codegen", codegen_jar])

    # run Sketch
    output_path = os.path.join(out_dir, "output", "{}.txt".format(encoder.demo_name))
    if sk:
        if os.path.exists(output_path): os.remove(output_path)
        sketch.set_default_option(opts)
 
        logging.info('sk_dir: {}, output_path: {}'.format(encoder.sk_dir, output_path))
        _, r = sketch.run(encoder.sk_dir, output_path)
 
        # if sketch fails, halt the process here
        if not r: return 1
    elif not prg:
        jskparser.error("need to pass in some file")
 
    return 0
Ejemplo n.º 4
0
def test(polys, ber, message=None, print_flag=False):
  if message == None:
    f = open('message.txt', 'rb') # holds a 1000-bit binary string
    message = f.read().strip()

  e = Encoder(polys, message, ber, print_flag=print_flag)
  encoded = e.encoded
  sent = e.send()

  t = Trellis(polys, sent)
  decoded = t.decode_message()

  if print_flag:
    print 'Message: %s' % message
    print 'Decoded: %s' % decoded

  return hamming_distance(message, decoded)
Ejemplo n.º 5
0
def main():
    enc = Encoder(pin_clk=12, pin_dt=14, clicks=4, accel=5, max_val=127)
    osc = Client(OSC_SERVER, OSC_PORT)

    oldval = 0
    try:
        while True:
            val = enc.value
            if oldval != val:
                oldval = val

                osc.send(OSC_TOPIC, ('m', (0, 0xB0, MIDI_CC, val)))

            enc.cur_accel = max(0, enc.cur_accel - enc.accel)
            sleep_ms(UPDATE_DELAY)
    except Exception as exc:
        enc.close()
        print(exc)
    def encode_decode(self, k):

        print "\nTesting encoding and then decoding with k = %s" % k

        md5 = hashlib.md5()

        with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker:
            chunk = chunker.chunk()
            while chunk:
                padding = chunk.padding

                symbols = [(i, chunk[i]) for i in xrange(k)]
                encoder = Encoder(k, symbols)
                symbols = []

                # Start at k/2 and produce 1.25k more symbols to get a mix
                # of parity and source symbols
                for i in xrange(k * 2):
                    symbols.append(encoder.next())

                encoder = None
                decoder = Decoder(k)
                for tup in symbols:
                    decoder.append(tup)

                decoder.decode()
                decoded = bytearray()
                for i in xrange(k):
                    esi, s = decoder.next()
                    decoded += s.tostring()
                decoder = None

                if padding:
                    padding = 0 - padding
                    print "Removing padding", padding, "bytes"
                    decoded = decoded[:padding]

                md5.update(decoded)
                # Continue on to the next chunk
                chunk = chunker.chunk()

        print "Original digest:", self.original_digest
        print "Decoded digest:", md5.hexdigest()
        return self.original_digest == md5.hexdigest()
Ejemplo n.º 7
0
 def startStopRecording_(self, notification):
     if self.recording:
         self.recorder.join()
         # Create timelapse after recording?
         if encode:
             self.encoder = Encoder(
                 self.image_dir, self.encoder_output_basedir)
             self.encoder.start()
     else:
         self.recorder = Recorder(self.image_dir, screenshot_interval)
         self.recorder.start()
     self.recording = not self.recording
     self.setStatus()
Ejemplo n.º 8
0
    def __init__(self):
        from encoder import Encoder

        self.encoder = Encoder(3.1196)
        # the transitions
        self.m = defaultdict(lambda: defaultdict(lambda: Automaton.m_inf))

        # self.m = defaultdict(dict)
        # emissions for the states
        self.emissions = {}
        self.m_emittors = defaultdict(set)

        # how edge values can be coded
        self.quantizer = None
Ejemplo n.º 9
0
	def __init__(self, posX, posY, posTheta):
		
		self.w = 15
		self.h = 10
		
		self.heading = 0
		self.posTheta = posTheta
		self.posX = posX
		self.posY = posY
    
		self.targetTheta = 0
    
		self.logicalTheta = 0
		self.logicalX = 0
		self.logicalY = 0
    
		self.Rw = 9
		self.Tr = 240
		self.D  = 9
		
		self.dT1 = 0
		self.dT2 = 0
		
		self.T1 = 0
		self.T2 = 0
		
		self.counter = 0
		
		self.leftEncoder = Encoder()
		self.rightEncoder = Encoder()
		
		self.leftRangeSensor = RangeSensor(self, self.h / 2 + 1, 0, -1)
		self.frontRangeSensor = RangeSensor(self, self.w / 2 + 1, 1, 0)
		self.rightRangeSensor = RangeSensor(self, self.h / 2 + 1, 0, 1)
	
		self.statsWidget = RobotStatsWidget(self)
Ejemplo n.º 10
0
    def __init__(   self,
                    device      = '/dev/ttyUSB0',
                    set_id      = True,
                    device_id   = "01",
                    debug           = True,
                 ):

        self.debug = debug
        self.ser = serial.Serial(
            port            = device,
            baudrate        = 9600,
            parity          = serial.PARITY_NONE,
            stopbits        = serial.STOPBITS_ONE,
            bytesize        = serial.EIGHTBITS,
            xonxoff         = True,
        )
        self.encoder = Encoder(1)
        if set_id:
            self._set_device_id(device_id)
Ejemplo n.º 11
0
def main():
    use_cuda = args.use_cuda
    half_precision = args.half_precision
    print("Cuda set to {} | Cuda availability: {}".format(
        use_cuda, torch.cuda.is_available()))

    experiment = "vae_latent3"
    logger = SummaryWriter(log_dir='./logs', comment=experiment)

    train_data = UnlabeledContact(
        data='/home/ygx/data/fspeptide/fs_peptide.npy')
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441

    encoder = Encoder(input_size=input_size, latent_size=3)
    decoder = Decoder(latent_size=3, output_size=input_size)
    #vae = VAE(encoder, decoder, use_cuda=use_cuda)
    vae = VAE(encoder, decoder)
    #criterion = nn.BCELoss()

    if use_cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
        vae = vae.cuda()
        #criterion = criterion.cuda()
        if half_precision:
            encoder = encoder.half()
            decoder = decoder.half()
            vae = vae.half()

    optimizer = optim.SGD(vae.parameters(), lr=0.001)

    losses = AverageMeter()
    epoch_loss = 0
    total_loss = 0
    for epoch in range(100):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda()
                if half_precision:
                    inputs = inputs.half()
            inputs = Variable(inputs)

            # Compute output
            optimizer.zero_grad()
            #dec = vae(inputs)
            recon_batch, mu, logvar = vae(inputs)

            # Measure the loss
            loss = entropy_kl_loss(recon_batch, inputs, mu, logvar,
                                   args.batch_size, input_size)
            #kl = kl_loss(vae.z_mean, vae.z_sigma)
            #loss = criterion(dec, inputs) #+ kl # Adding KL is caussing loss > 1
            losses.update(loss.data[0], inputs.size(0))

            # Compute the gradient
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            # Logging
            # Adding graph is a lot of overhead
            #logger.add_graph_onnx(vae)

            # log loss values every iteration
            logger.add_scalar('data/(train)loss_val', losses.val,
                              batch_idx + 1)
            logger.add_scalar('data/(train)loss_avg', losses.avg,
                              batch_idx + 1)

            # log the layers and layers gradient histogram and distributions
            #for tag, value in vae.named_parameters():
            #    tag = tag.replace('.', '/')
            #    logger.add_histogram('model/(train)' + tag, to_numpy(value), batch_idx + 1)
            #    logger.add_histogram('model/(train)' + tag + '/grad', to_numpy(value.grad), batch_idx + 1)

            # log the outputs of the autoencoder
            logger.add_image('model/(train)output',
                             make_grid(recon_batch.data), batch_idx + 1)
            #logger.add_image('model/(train)output', make_grid(dec.data), batch_idx + 1)

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

        #if epoch < 10:
        # Get latent encoding
        #latent_array = encoder(inputs).data[0].cpu().numpy()
        #filename = 'latent_epoch' + str(epoch)
        #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array)

        # Get reconstructed image
        #reconstructed_array = vae(inputs).data[0].cpu().numpy().reshape(21, 21)
        #recon_filename = 'reconstructed_epoch' + str(epoch)
        #np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename, reconstructed_array)

        if epoch % 10 == 0:
            torch.save(vae.state_dict(), args.save_path + 'epoch' + str(epoch))

            #latent_array = encoder(inputs).data[0].cpu().numpy()
            #filename = 'latent_epoch' + str(epoch)
            #np.save('./latent_saves/kl_bce_latent3/' + filename, latent_array)

            reconstructed_array, _, _ = vae(
                inputs).data[0].cpu().float().numpy().reshape(21, 21)
            recon_filename = 'reconstructed_epoch' + str(epoch)
            np.save('./reconstruct_saves/kl_bce_latent3/' + recon_filename,
                    reconstructed_array)
Ejemplo n.º 12
0
    r_val = [train_set, valid_set, test_set]
    return r_val

if __name__ == '__main__':
    batch_size = 10
    n_input = 3
    n_hidden = 50
    learning_rate = 0.01
    rng = numpy.random.RandomState(12321)
    word_dict = Dict(n_input, rng)
    datasets = load_data(sys.argv[1], word_dict)
    train_set, valid_set, test_set = datasets
    n_train_batches = [train_set[b][0].get_value(borrow=True).shape[-1] // batch_size for b in train_set.keys()]
    n_valid_batches = [valid_set[b][0].get_value(borrow=True).shape[-1] // batch_size for b in valid_set.keys()]
    n_test_batches = [test_set[b][0].get_value(borrow=True).shape[-1] // batch_size for b in test_set.keys()]
    encoder = Encoder(rng, n_hidden=n_hidden, n_input=n_input)

    # Holds indices for the batch
    index = T.lscalar()
    # Emb_dim x sequence_len X batch_size
    x = T.tensor3('x')
    # Batch_size x 1
    y = T.ivector('y')
    # sequence_len x batch_size
    m = T.matrix('m')

    get_hidden_states = theano.function(
        inputs=[index],
        outputs=encoder.compute_hidden_states_no_output(x),
        givens=[
            (x, test_set[10][0][index * batch_size: (index + 1) * batch_size]),
Ejemplo n.º 13
0
class Transmitter:
    """
        Sends content to the led ticker device.
        - transmission is blocking
    """
    def __init__(   self,
                    device      = '/dev/ttyUSB0',
                    set_id      = True,
                    device_id   = "01",
                    debug           = True,
                 ):

        self.debug = debug
        self.ser = serial.Serial(
            port            = device,
            baudrate        = 9600,
            parity          = serial.PARITY_NONE,
            stopbits        = serial.STOPBITS_ONE,
            bytesize        = serial.EIGHTBITS,
            xonxoff         = True,
        )
        self.encoder = Encoder(1)
        if set_id:
            self._set_device_id(device_id)

    def add_message(self, message, page):
        """ adds a message to the specified page """
        self._send_receive(commands.get_message_cmd(message, page = page))

    def set_schedule(self, pages):
        """ sets the schedule (order) of the pages """
        self._send_receive(commands.get_schedule_cmd(pages))

    def clear_screen(self):
        """ delete all pages """
        self.add_message(" ", 0)
        self.set_schedule([0])

    def delete_pages(self):
        """ delete all pages """
        self._send('<D*>')

    def end(self):
        """ terminates the connection to the led ticker """
        self.ser.close()

    def _send_receive(self, command):
        self.ser.flushInput()
        self._send(command)
        self._receive_response();

    def _send(self, command):
        """ sends a command to the device """
        data = self.encoder.encode(command)
        self._debug_log(data)

        self.ser.write(self.encoder.encode(command))

    def _receive_response(self):
        """ receives a ACK/NACK response

            ACK/NACK is sent as ascii, since its not the same length ether
            4 or 3 character are read to prevent blocking
        """
        response = self.ser.read(1)
        if response == 'N':
            chars_to_read = 3
        else:
            chars_to_read = 2
        response += self.ser.read(chars_to_read)

        self._debug_log('response: ' + response)
        return response

    def _debug_log(self, message):
        if self.debug:
            print message

    def _set_device_id(self, device_id):
        # set the device id
        self.ser.flushInput()
        self.ser.write('<ID><01><E>' + device_id)
        self._debug_log('response: ' + self.ser.read(2))
Ejemplo n.º 14
0
class Automaton(object):
    """ Classic Moore-automaton class with
    @m: transitions per states
    @emissions: emission per states
    @m_emittors: states per emitted letter"""

    eps = 1e-7
    m_inf = float("-inf")

    def __init__(self):
        from encoder import Encoder

        self.encoder = Encoder(3.1196)
        # the transitions
        self.m = defaultdict(lambda: defaultdict(lambda: Automaton.m_inf))

        # self.m = defaultdict(dict)
        # emissions for the states
        self.emissions = {}
        self.m_emittors = defaultdict(set)

        # how edge values can be coded
        self.quantizer = None

    @staticmethod
    def read_transitions(filename):
        tr = {}
        f = open(filename)
        for l in f:
            (state1, state2, probstr) = l.strip().split()
            if state1 not in tr:
                tr[state1] = {}
            prob = float(probstr)
            if not (prob < 0.0):
                raise ValueError("invalid probabilities in {0}, ".format(filename) + "only logprobs are accepted.")

            tr[state1][state2] = prob
        f.close()
        return tr

    @staticmethod
    def create_from_dump(f):
        """ Reads automaton dump from @f"""
        automaton = Automaton()
        # create states and emissions
        for line in f:
            l = line.strip().split()
            if len(l) == 4:
                s1, _, s2, weight = l
                s2 = s2.strip(":")
                weight = float(weight)

                # check this with 1e-10 instead of 0.0 because of floating
                # point precision error
                if weight > 1e-10:
                    raise ValueError("Only logprogs are accepted in dumps")

                automaton.m[s1][s2] = weight
            elif len(l) == 2:
                state = l[0].rstrip(":")
                emission = eval(l[1])
                automaton.emissions[state] = emission
                automaton.m_emittors[emission].add(state)

        for state in automaton.m.iterkeys():
            automaton.check_state_sum(state)

        automaton.finalize()
        return automaton

    @staticmethod
    def _create_automaton_from_alphabet(alphabet):
        """ Creates states of the automaton given by @alphabet
        @alphabet is a dict from letters to the number of states that emits
        that letter
        """
        automaton = Automaton()

        # create states and emissions
        is_degenerate = True
        for letter in alphabet:
            for index in xrange(alphabet[letter]):
                state = "".join(letter) + "_" + str(index)
                automaton.emissions[state] = letter
                automaton.m_emittors[letter].add(state)
                if is_degenerate and not Automaton.is_epsilon_state(state):
                    # found at least one emitting state
                    is_degenerate = False

        if is_degenerate:
            raise Exception("Automaton has no emittors")

        return automaton

    @staticmethod
    def create_uniform_automaton(alphabet, initial_transitions=None):
        """Creates an automaton with alphabet and uniform transition probabilities.
        If initial_transitions is given (dict of (state, dict of (state, probability),
        returned by read_transitions) the remaining probability mass will be 
        divided up among the uninitialized transitions in an uniform manner.
        """

        automaton = Automaton._create_automaton_from_alphabet(alphabet)

        states = automaton.emissions.keys()
        states.append("^")
        states.append("$")

        if initial_transitions:
            for s in initial_transitions.keys():
                if s not in states:
                    raise Exception("invalid state name in initial " + "transitions given by option -I")
                for s2 in initial_transitions[s]:
                    if s2 not in states:
                        raise Exception("invalid state name in initial " + "transitions given by option -I")

        # calculate uniform transition distributions
        for s1 in states:
            if s1 == "$":
                continue

            init_total = 0.0
            states_initialized = set()
            if initial_transitions and s1 in initial_transitions:
                for s2 in initial_transitions[s1]:
                    if s2 not in states:
                        raise Exception("invalid state name in init: %s" % s2)

                    prob = initial_transitions[s1][s2]
                    automaton.m[s1][s2] = prob
                    init_total += math.exp(prob)
                    states_initialized.add(s2)
                    # TODO refactor this
                    if init_total > 1.0000001:
                        sys.stderr.write("state: {0}, init total: {1}\n".format(s1, init_total))
                        raise Exception("Too much probability for init_total")

            # divide up remaining mass into equal parts
            valid_next_states = set([s2 for s2 in states if Automaton.is_valid_transition(s1, s2)])

            if valid_next_states == states_initialized:
                continue

            u = (1.0 - init_total) / (len(valid_next_states) - len(states_initialized))
            for s2 in valid_next_states - states_initialized:
                try:
                    automaton.m[s1][s2] = math.log(u)
                except ValueError:
                    automaton.m[s1][s2] = Automaton.m_inf

        automaton.finalize()
        return automaton

    @staticmethod
    def create_from_corpus(corpus):
        """ Creates an automaton from a corpus, where @corpus is a dict from
        items (str or tuple) to counts"""
        automaton = Automaton()
        alphabet = set()
        total = float(sum(corpus.itervalues()))
        for item, cnt in corpus.iteritems():
            item = ("^",) + item + ("$",)
            for i in range(len(item) - 1):
                alphabet.add(item[i])
                alphabet.add(item[i + 1])
                if item[i + 1] in automaton.m[item[i]]:
                    automaton.m[item[i]][item[i + 1]] += cnt / total
                else:
                    automaton.m[item[i]][item[i + 1]] = cnt / total

        # changing to log probs and normalize
        for state1, outs in automaton.m.iteritems():
            for state2 in outs.iterkeys():
                outs[state2] = math.log(outs[state2])
            automaton.normalize_state(state1)

        for l in alphabet:
            automaton.emissions[l] = l
            automaton.m_emittors[l].add(l)

        automaton.finalize()
        return automaton

    @staticmethod
    def is_epsilon_state(state):
        return state.startswith("EPSILON_")

    @staticmethod
    def is_valid_transition(state1, state2):
        # subsequent non emitting states are not allowed
        # the only exception is '^' -> '$'
        if (state1, state2) == ("^", "$"):
            return True
        return (
            not (Automaton.nonemitting(state1) and Automaton.nonemitting(state2))
            and not state2 == "^"
            and not state1 == "$"
        )

    @staticmethod
    def nonemitting(state):
        return state == "^" or state == "$" or Automaton.is_epsilon_state(state)

    def finalize(self):
        for state1, transitions in self.m.iteritems():
            self.m[state1] = dict(transitions)
        self.m = dict(self.m)
        self.m_emittors = dict(self.m_emittors)

    def emittors(self, letter):
        return self.m_emittors[letter]

    def update_probability_of_string_in_state(self, string, state, memo):
        """The probability of the event that the automaton emits
        'string' and finishes the quest at 'state'.
        'state' did not emit yet:
        It will emit the next symbol following 'string'.
        """
        total = Automaton.m_inf
        # compute real emissions
        for previousState in self.emissions:
            previousState_i = self.state_indices[previousState]

            # stop if no transitions to state
            if not state in self.m[previousState]:
                continue

            state_emit = self.emissions[previousState]
            state_emit_l = len(state_emit)

            if state_emit == string[-state_emit_l:]:
                head = string[:-state_emit_l]

                soFar = Automaton.m_inf
                if head in memo and memo[head][previousState_i] is not None:
                    soFar = memo[head][previousState_i]
                    soFar += self.m[previousState][state]
                total = max(soFar, total)

        # check the case of epsilon emission
        for epsilonState in self.m.keys():
            epsilonState_i = self.state_indices[epsilonState]

            if epsilonState in self.emissions:
                continue

            # if the automaton is not complete, avoid KeyError:
            if not state in self.m[epsilonState]:
                continue

            if not string in memo or memo[string][epsilonState_i] is None:
                continue

            soFar = memo[string][epsilonState_i]
            soFar += self.m[epsilonState][state]
            total = max(soFar, total)

        if string not in memo:
            memo[string] = [None] * len(self.state_indices)

        memo[string][self.state_indices[state]] = total

    def update_probability_of_string(self, string, memo):
        """Probability that the automaton emits this string"""
        states = set(self.m.keys())
        states.add("$")
        states.remove("^")

        # first compute the epsilon states probs because of the
        # memoization dependency
        for state in sorted(states, key=lambda x: not Automaton.is_epsilon_state(x)):
            self.update_probability_of_string_in_state(string, state, memo)

    def probability_of_strings(self, strings):
        """
        Expects a list of strings.
        Outputs a map from those strings to probabilities.
        """
        topsorted = closure_and_top_sort(strings)
        # remove empty string
        topsorted = topsorted[1:]

        memo = self.init_memo()
        output = {}

        for string in topsorted:
            self.update_probability_of_string(string, memo)
            output[string] = memo[string][self.state_indices["$"]]
        return output

    def init_memo(self):

        # to save memory if memo is huge, inner dicts in memo are actually
        # lists with state indices
        states = set(self.m.keys())
        states.add("$")
        self.state_indices = dict([(s, i) for i, s in enumerate(states)])

        memo = {(): [None] * len(states)}
        epsilon_reachables = set(["^"])
        while True:
            targets = set()
            for state in epsilon_reachables:
                state_i = self.state_indices[state]

                for target in self.m[state]:
                    target_i = self.state_indices[target]

                    if target in epsilon_reachables:
                        continue

                    if Automaton.is_epsilon_state(target):
                        targets.add(target)
                        # start is not memoized

                    so_far = Automaton.m_inf
                    if memo[()][target_i] is not None:
                        so_far = memo[()][target_i]

                    prob_this_way = self.m[state][target]
                    if state != "^":
                        prob_this_way += memo[()][state_i]

                    memo[()][target_i] = max(so_far, prob_this_way)
            epsilon_reachables |= targets
            if len(targets) == 0:
                break

        return memo

    @staticmethod
    def kullback(p1, p2):
        if p1 == 0.0:
            return 0.0
        return p1 * math.log(p1 / p2)

    @staticmethod
    def squarerr(p1, p2):
        return (p1 - p2) ** 2

    @staticmethod
    def l1err(p1, p2):
        return abs(p1 - p2)

    def distance_from_corpus(self, corpus, distfp, reverse=False, distances={}):
        distance = 0.0
        probs = self.probability_of_strings(list(corpus.keys()))
        for item, corpus_p in corpus.iteritems():
            if corpus_p > 0.0:
                modeled_p = math.exp(probs[item])
                if modeled_p == 0.0:
                    modeled_p = 1e-50

                dist = distfp(corpus_p, modeled_p) if not reverse else distfp(modeled_p, corpus_p)
                distance += dist
                distances[item] = dist
        return distance

    def round_and_normalize_state(self, state):
        if self.quantizer:
            self.round_transitions(self.m[state])
        self.normalize_state(state)

    def round_transitions(self, edges):
        for state, weight in edges.iteritems():
            edges[state] = self.quantizer.representer(weight)

    def normalize_state(self, state):
        edges = self.m[state]
        total_log = math.log(sum(math.exp(v) for v in edges.values()))
        for s2 in edges.keys():
            edges[s2] -= total_log

    def round_and_normalize(self):
        for state in self.m.iterkeys():
            self.round_and_normalize_state(state)

    def smooth(self):
        """Smooth zero transition probabilities"""
        eps = math.log(Automaton.eps)
        for state, edges in self.m.iteritems():
            for other_state in edges:
                old_val = edges.get(other_state, Automaton.m_inf)
                if old_val < eps:
                    edges[other_state] = eps

            # normalize the transitions
            self.normalize_state(state)

    def boost_edge(self, edge, factor):
        """Adds @factor logprob to @edge"""
        s1, s2 = edge
        self.m[s1][s2] += factor
        self.round_and_normalize_state(s1)
        self.check_state_sum(s1)

    def check_state_sum(self, state):
        edges = self.m[state]
        s_sum = sum([math.exp(log_prob) for log_prob in edges.values()])
        if abs(1.0 - s_sum) < 1e-3:
            return
        else:
            raise Exception("transitions from state {0} ".format(state) + "don't sum to 1, but {0}".format(s_sum))

    def dump(self, f):
        if self.quantizer is not None:
            emit_bits, trans_bits = self.encoder.automaton_bits(self)
            total_bits = emit_bits + trans_bits
            f.write(
                "total bits: {0} ({1} transition bits, {2} emission bits)\n".format(total_bits, emit_bits, trans_bits)
            )
        states = sorted(self.m.keys())
        for s1 in states:
            for s2 in states + ["$"]:
                if s2 in self.m[s1]:
                    f.write("{0} -> {1}: {2}\n".format(s1, s2, self.m[s1][s2]))
        for s1, em in self.emissions.iteritems():
            f.write("{0}: {1}\n".format(s1, repr(em).replace(" ", "")))

    def split_state(self, state, new_state, ratio):
        hub_in = "EPSILON_{0}_{1}_in".format(state, new_state)
        hub_out = "EPSILON_{0}_{1}_out".format(state, new_state)
        self.m[hub_in] = {state + "_0": math.log(1 - ratio), new_state + "_0": math.log(ratio)}
        self.m[hub_out] = {}
        self.emissions[new_state + "_0"] = (new_state,)
        self.m_emittors[(new_state,)] = set([new_state + "_0"])
        for s1, trs in self.m.items():
            if s1 in (hub_in, hub_out):
                continue
            for s2, p in trs.items():
                if s2.startswith(state):
                    self.m[s1][hub_in] = p
                    self.m[s1][s2] = float("-inf")
        for s2, p in self.m[state + "_0"].items():
            self.m[hub_out][s2] = p
        self.m[state + "_0"] = {hub_out: 0.0}
        self.m[new_state + "_0"] = {hub_out: 0.0}

    def language(self):
        generated_mass = 0.0

        emits = set(self.emissions.itervalues())
        memo = self.init_memo()
        prev_mass = -1.0
        while abs(generated_mass - prev_mass) >= 1e-4 and 1.0 - generated_mass > 0.01:

            prev_mass = generated_mass
            for word in memo.keys():
                for emit in emits:
                    new_word = word + emit
                    self.update_probability_of_string(new_word, memo)

            # filter small probs
            memo = dict([(k, [(None if (lp is None or lp < -100) else lp) for lp in l]) for k, l in memo.iteritems()])

            # filter small prob words
            memo = dict([(k, l) for k, l in memo.iteritems() if sum(filter(lambda x: x is not None, l)) > -200])

            # compute generated mass
            generated_mass = sum(
                [
                    math.exp(prob_list[self.state_indices["$"]])
                    for s, prob_list in memo.iteritems()
                    if (s != () and prob_list[self.state_indices["$"]] is not None)
                ]
            )
            # compute hq - only debug
            # hq = sum([-probs[self.state_indices["$"]] * math.exp(probs[self.state_indices["$"]]) for probs in memo.itervalues()])

        for k in memo.keys():
            if memo[k][self.state_indices["$"]] is None:
                del memo[k]

        return memo
Ejemplo n.º 15
0
class Timelapse(NSObject):
    """ Creates a timelapse video """
    def applicationDidFinishLaunching_(self, notification):
        self.check_dependencies()

        # Initialize recording
        self.recording = start_recording

        # Set correct output paths
        self.recorder_output_basedir = os.path.join(dir_base, dir_pictures,
                                                    dir_app)
        self.encoder_output_basedir = os.path.join(dir_base, dir_movies)

        self.image_dir = self.create_dir(self.recorder_output_basedir)

        # Create a reference to the statusbar (menubar)
        self.statusbar = NSStatusBar.systemStatusBar()

        # Create item in statusbar
        self.statusitem = self.statusbar.statusItemWithLength_(
            NSVariableStatusItemLength)
        self.statusitem.setHighlightMode_(1)  # Highlight upon clicking

        # Create a simple menu and bind it to the status item
        self.menu = self.createMenu()
        self.statusitem.setMenu_(self.menu)

        # Load icons and show them in the statusbar
        self.loadIcons()
        self.setStatus()

    def loadIcons(self):
        self.icon_recording = NSImage.alloc().initWithContentsOfFile_(
            os.path.join("timelapse", dir_resources, image_recording))
        self.icon_idle = NSImage.alloc().initWithContentsOfFile_(
            os.path.join("timelapse", dir_resources, image_idle))

    def setStatus(self):
        """ Sets the image and menu text according to recording status """
        if self.recording:
            self.statusitem.setImage_(self.icon_recording)
            self.recordButton.setTitle_(text_recorder_running)
            self.statusitem.setToolTip_(tooltip_running)
        else:
            self.statusitem.setImage_(self.icon_idle)
            self.recordButton.setTitle_(text_recorder_idle)
            self.statusitem.setToolTip_(tooltip_idle)

    def createMenu(self):
        """ Status bar menu """
        menu = NSMenu.alloc().init()
        # Bind record event
        self.recordButton = NSMenuItem.alloc(
        ).initWithTitle_action_keyEquivalent_(text_recorder_idle,
                                              'startStopRecording:', '')
        menu.addItem_(self.recordButton)
        # Quit event
        menuitem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            'Quit', 'terminate:', '')
        menu.addItem_(menuitem)
        return menu

    def startStopRecording_(self, notification):
        if self.recording:
            self.recorder.join()
            # Create timelapse after recording?
            if encode:
                self.encoder = Encoder(self.image_dir,
                                       self.encoder_output_basedir)
                self.encoder.start()
        else:
            self.recorder = Recorder(self.image_dir, screenshot_interval)
            self.recorder.start()
        self.recording = not self.recording
        self.setStatus()

    @objc.python_method
    def create_dir(self, base_dir):
        """ Creates a specified directory and the path to it if necessary """
        if create_session_subdir:
            # Create a new subdirectory
            output_dir = os.path.join(base_dir, self.get_sub_dir(base_dir))
        else:
            # Don't create a subdirectory. Use base directory for output
            output_dir = base_dir
        # Create path if it doesn't exist
        try:
            print(output_dir)
            os.makedirs(output_dir)
        except OSError as e:
            print("Error while creating directory:", e)
            exit()
        return output_dir

    @objc.python_method
    def get_sub_dir(self, base_dir):
        """ Returns the next nonexistend subdirectory to base_dir """
        subdir_base = os.path.join(base_dir, subdir_suffix)
        # Check if we can use subdir without any session id
        subdir = subdir_base
        # Use a session id only if subdir already exists
        session_number = 0
        while os.path.exists(subdir):
            # We can't use subdir. Create directory with session id
            session_number += 1
            subdir = subdir_base + "-" + str(session_number)
        return subdir

    def check_dependencies(self):
        try:
            subprocess.check_call(['ffmpeg'])
        except subprocess.CalledProcessError:
            print("ffmpeg command was found")
            pass  # ffmpeg is found, but returns non-zero exit as expected
            # This is a quick and dirty check; it leaves some spurious output
            # for the user to puzzle over.
        except OSError:
            print(not_found_msg)
Ejemplo n.º 16
0
def main():
    global device
    device = next(chromecast)
    enc = Encoder(12, 13, clicks=2, reverse=True)
    np = volume.NeoPixelRing(4, device, machine.Pin(15), 16)
    button = machine.Pin(5, machine.Pin.IN)
    cast = connect2device(np)
    current_vol = cast.get_volume
    print('Connected to:', cast_name[device], device, 'current vol:', current_vol)
    enc.set_val(current_vol)
    last_enc_val = current_vol
    last_change_tick = time.ticks_ms()
    np.change_device(device, current_vol)
    
    while True:
        val = enc.value
        if last_enc_val != val:
            print(val)
            np.set_vol(val)
            last_enc_val = val
            last_change_tick = time.ticks_ms()

        #CHANGING VOLUME    
        if (time.ticks_diff(time.ticks_ms(), last_change_tick) > 200) and (last_enc_val != current_vol):
            cast.set_volume(val)
            current_vol = cast.get_volume
            print('current volume:', current_vol)

        #SLEEP AFTER DELAY
        if (time.ticks_diff(time.ticks_ms(), last_change_tick) > 10000): #10 sec
            cast.disconnect()
            np.turn_off()
            print("SLEEP")
            esp.deepsleep()
        
        #CHANGING CHROMECAST WITH ENCODER BUTTON
        if button.value():
            print('BUTTON PRESSED')
            b_start = time.ticks_ms()
            while button.value():
                if (time.ticks_diff(time.ticks_ms(), b_start) > 2000):
                    print('STOPPING PLAYBACK')
                    np.stop()
                    cast.stop_playback()
                    time.sleep_ms(1500)
                    np.set_vol(current_vol)
                    last_change_tick = time.ticks_ms()
                    break
            if time.ticks_diff(time.ticks_ms(), b_start) < 2000:
                cast.disconnect()
                prev_device = device
                device = next(chromecast)
                if device is not prev_device:
                    cast = connect2device(np)
                    current_vol = cast.get_volume
                    enc.set_val(current_vol)
                    np.change_device(device, current_vol)
                    print('switched to:', cast_name[device], device, 'current vol:', current_vol)
                last_change_tick = time.ticks_ms()

        time.sleep_ms(100)
Ejemplo n.º 17
0
 def __init__(self, model_pre_path):
     self.encoder = Encoder(model_pre_path)
     self.decoder = Decoder(model_pre_path)
Ejemplo n.º 18
0
class Timelapse(NSObject):
    """ Creates a timelapse video """

    def applicationDidFinishLaunching_(self, notification):
        self.check_dependencies()

        # Initialize recording
        self.recording = start_recording

        # Set correct output paths
        self.recorder_output_basedir = os.path.join(
            dir_base, dir_pictures, dir_app)
        self.encoder_output_basedir = os.path.join(dir_base, dir_movies)

        self.image_dir = self.create_dir(self.recorder_output_basedir)

        # Create a reference to the statusbar (menubar)
        self.statusbar = NSStatusBar.systemStatusBar()

        # Create item in statusbar
        self.statusitem = self.statusbar.statusItemWithLength_(
            NSVariableStatusItemLength)
        self.statusitem.setHighlightMode_(1)  # Highlight upon clicking

        # Create a simple menu and bind it to the status item
        self.menu = self.createMenu()
        self.statusitem.setMenu_(self.menu)

        # Load icons and show them in the statusbar
        self.loadIcons()
        self.setStatus()

    def loadIcons(self):
        self.icon_recording = NSImage.alloc().initWithContentsOfFile_(
            os.path.join("timelapse", dir_resources, image_recording))
        self.icon_idle = NSImage.alloc().initWithContentsOfFile_(
            os.path.join("timelapse", dir_resources, image_idle))

    def setStatus(self):
        """ Sets the image and menu text according to recording status """
        if self.recording:
            self.statusitem.setImage_(self.icon_recording)
            self.recordButton.setTitle_(text_recorder_running)
            self.statusitem.setToolTip_(tooltip_running)
        else:
            self.statusitem.setImage_(self.icon_idle)
            self.recordButton.setTitle_(text_recorder_idle)
            self.statusitem.setToolTip_(tooltip_idle)

    def createMenu(self):
        """ Status bar menu """
        menu = NSMenu.alloc().init()
        # Bind record event
        self.recordButton = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            text_recorder_idle, 'startStopRecording:', '')
        menu.addItem_(self.recordButton)
        # Quit event
        menuitem = NSMenuItem.alloc().initWithTitle_action_keyEquivalent_(
            'Quit', 'terminate:', '')
        menu.addItem_(menuitem)
        return menu

    def startStopRecording_(self, notification):
        if self.recording:
            self.recorder.join()
            # Create timelapse after recording?
            if encode:
                self.encoder = Encoder(
                    self.image_dir, self.encoder_output_basedir)
                self.encoder.start()
        else:
            self.recorder = Recorder(self.image_dir, screenshot_interval)
            self.recorder.start()
        self.recording = not self.recording
        self.setStatus()

    @objc.python_method
    def create_dir(self, base_dir):
        """ Creates a specified directory and the path to it if necessary """
        if create_session_subdir:
            # Create a new subdirectory
            output_dir = os.path.join(base_dir, self.get_sub_dir(base_dir))
        else:
            # Don't create a subdirectory. Use base directory for output
            output_dir = base_dir
        # Create path if it doesn't exist
        try:
            print(output_dir)
            os.makedirs(output_dir)
        except OSError as e:
            print("Error while creating directory:", e)
            exit()
        return output_dir

    @objc.python_method
    def get_sub_dir(self, base_dir):
        """ Returns the next nonexistend subdirectory to base_dir """
        subdir_base = os.path.join(base_dir, subdir_suffix)
        # Check if we can use subdir without any session id
        subdir = subdir_base
        # Use a session id only if subdir already exists
        session_number = 0
        while os.path.exists(subdir):
            # We can't use subdir. Create directory with session id
            session_number += 1
            subdir = subdir_base + "-" + str(session_number)
        return subdir

    def check_dependencies(self):
        try:
            subprocess.check_call(['ffmpeg'])
        except subprocess.CalledProcessError:
            print("ffmpeg command was found")
            pass  # ffmpeg is found, but returns non-zero exit as expected
            # This is a quick and dirty check; it leaves some spurious output
            # for the user to puzzle over.
        except OSError:
            print(not_found_msg)
Ejemplo n.º 19
0
def train_decoder(train_set, opt, learning_r, encoder=None, epoch=500, batch_size=32, pre_trained_path='',
                  test_set=None,
                  name="Validation", tensorboard=False, img_tag=""):
    data_loader = make_data_loader(data_to_loader=train_set, batch_size=batch_size)
    best_test_loss = 999999999

    if encoder is None:
        encoder = Encoder()
    decoder = Decoder()

    if pre_trained_path != '':
        encoder = torch.load(pre_trained_path)["model"]

    if use_gpu:
        decoder = decoder.cuda()
        encoder = encoder.cuda()

    criterion = nn.MSELoss()
    params = list(decoder.parameters()) + list(encoder.parameters())

    if opt == "ADAM":

        optimizer = optim.Adam(params, lr=learning_r)
    else:
        optimizer = optim.SGD(params, lr=learning_r)

    for epoch in range(epoch):  # loop over the dataset multiple times
        print("Epoch: ", epoch)

        for i, data in enumerate(data_loader, 0):
            # get the inputs
            inputs, labels = data

            if use_gpu:
                inputs = inputs.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            hidden = encoder(inputs)
            outputs = decoder(hidden)

            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

        train_loss = test_decoder(encoder, decoder, vad_set=train_set, name="Training", show_log=True, img_tag=img_tag,
                                  tensorboard=tensorboard, epoch=epoch)

        if test_set is not None:
            test_loss = test_decoder(encoder, decoder, vad_set=test_set, name=name, show_log=True, img_tag=img_tag,
                                     tensorboard=tensorboard, epoch=epoch)
            if tensorboard and name is not "Validation":
                writer_test.add_scalar('MSE_loss', test_loss, epoch)

            if test_loss < best_test_loss:
                best_test_loss = test_loss

        if tensorboard and name is not "Validation":
            writer_train.add_scalar('MSE_loss', train_loss, epoch)

    return encoder, decoder, best_test_loss
Ejemplo n.º 20
0
from machine import Pin, I2C, Timer
from board import *
from bno055 import BNO055 # IMU

from drv8833 import DRV8833 # your implementation
from motor import PIDMotor # your implementation
from encoder import Encoder # your implementation, don't forget clear_count
from balance import Balance

import gc # for garbage collection methods

# Setup motors
########## Check Pin Numbers! ##########
# Change pin numbers here to match yours or rewire your robot
leftEnc = Encoder(34, 39, 2)
leftM = DRV8833(19, 16)

rightEnc = Encoder(36, 4, 1)
rightM = DRV8833(17, 21)
########## Check Pin Numbers! ##########

pidL = PIDMotor(leftM, leftEnc)
pidR = PIDMotor(rightM, rightEnc)

# setup IMU
i2c = I2C(0, sda=23, scl=22, freq=13000)
imu = BNO055(i2c)

# status LED
led = Pin(LED, mode=Pin.OUT)
Ejemplo n.º 21
0
    def __init__(self,
                 hyperparams,
                 is_training,
                 inputs,
                 input_lengths,
                 mel_targets=None,
                 linear_targets=None,
                 audio_lengths=None):

        self.encoder = Encoder(hyperparams, is_training, inputs, input_lengths)

        self.decoder = Decoder(hyperparams, is_training,
                               self.encoder.encoder_outputs, mel_targets)

        if is_training:
            with tf.variable_scope('loss'):
                mel_loss = tf.abs(mel_targets - self.decoder.mel_outputs)

                l1 = tf.abs(linear_targets - self.decoder.linear_outputs)

                self.linear_loss = tf.reduce_mean(l1)
                self.mel_loss = tf.reduce_mean(mel_loss)
                self.loss = self.linear_loss + self.mel_loss

            with tf.variable_scope('optimizer'):
                self.global_step = tf.get_variable(
                    "global_step",
                    shape=[],
                    trainable=False,
                    initializer=tf.zeros_initializer,
                    dtype=tf.int32)

                step = tf.cast(self.global_step + 1, dtype=tf.float32)

                self.learning_rate = hyperparams.initial_learning_rate * \
                            tf.train.exponential_decay(1., step, 3000, 0.95)

                optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                                   hyperparams.adam_beta1,
                                                   hyperparams.adam_beta2)
                self.gradients, variables = zip(
                    *optimizer.compute_gradients(self.loss))
                clipped_gradients, _ = tf.clip_by_global_norm(
                    self.gradients, 1.0)

                # Memo the total length of audio this model was trained on
                self.total_length = tf.get_variable(
                    'total_train_length', [],
                    initializer=tf.zeros_initializer,
                    dtype=tf.float32)

                update_total_length = tf.assign_add(
                    self.total_length, tf.reduce_sum(audio_lengths))

                # Add dependency on UPDATE_OPS; otherwise batchnorm won't work correctly. See:
                # https://github.com/tensorflow/tensorflow/issues/1122
                with tf.control_dependencies(
                        tf.get_collection(tf.GraphKeys.UPDATE_OPS) +
                    [update_total_length]):
                    self.optimize = optimizer.apply_gradients(
                        zip(clipped_gradients, variables),
                        global_step=self.global_step)

                # Logging
                self.training_summary = tf.summary.merge([
                    tf.summary.scalar("total_loss", self.loss),
                    tf.summary.scalar("mel_loss", self.mel_loss),
                    tf.summary.scalar("linear_loss", self.linear_loss),
                    tf.summary.scalar("total_length_hours",
                                      self.total_length / 3600)
                ])

                self.validation_summary = tf.summary.merge([
                    tf.summary.scalar("validation_total_loss", self.loss),
                    tf.summary.scalar("validation_mel_loss", self.mel_loss),
                    tf.summary.scalar("validation_linear_loss",
                                      self.linear_loss),
                    tf.summary.image(
                        "v_alignment_matrix",
                        tf.expand_dims(self.decoder.alignments, 3))
                ])
Ejemplo n.º 22
0
class DataIterator:
    """数据集迭代类"""
    def __init__(self,
                 model_conf: ModelConfig,
                 mode: RunMode,
                 ran_captcha=None):
        """
        :param model_conf: 工程配置
        :param mode: 运行模式(区分:训练/验证)
        """
        self.model_conf = model_conf
        self.mode = mode
        self.path_map = {
            RunMode.Trains:
            self.model_conf.trains_path[DatasetType.TFRecords],
            RunMode.Validation:
            self.model_conf.validation_path[DatasetType.TFRecords]
        }
        self.batch_map = {
            RunMode.Trains: self.model_conf.batch_size,
            RunMode.Validation: self.model_conf.validation_batch_size
        }
        self.data_dir = self.path_map[mode]
        self.next_element = None
        self.image_path = []
        self.label_list = []
        self._label_list = []
        self._size = 0
        self.encoder = Encoder(self.model_conf, self.mode)
        self.ran_captcha = ran_captcha

    @staticmethod
    def parse_example(serial_example):

        features = tf.io.parse_single_example(
            serial_example,
            features={
                'label': tf.io.FixedLenFeature([], tf.string),
                'input': tf.io.FixedLenFeature([], tf.string),
            })
        _input = tf.cast(features['input'], tf.string)
        _label = tf.cast(features['label'], tf.string)

        return _input, _label

    @staticmethod
    def total_sample(file_name):
        sample_nums = 0
        for _ in tf.compat.v1.python_io.tf_record_iterator(file_name):
            sample_nums += 1
        return sample_nums

    def read_sample_from_tfrecords(self, path):
        """
        从TFRecords中读取样本
        :param path: TFRecords文件路径
        :return:
        """
        if isinstance(path, list):
            for p in path:
                self._size += self.total_sample(p)
        else:
            self._size = self.total_sample(path)

        min_after_dequeue = 1000
        batch = self.batch_map[self.mode]
        if self.model_conf.da_random_captcha['Enable']:
            batch = random.randint(int(batch / 3 * 2), batch)

        dataset_train = tf.data.TFRecordDataset(filenames=path,
                                                num_parallel_reads=20).map(
                                                    self.parse_example)
        dataset_train = dataset_train.shuffle(
            min_after_dequeue,
            reshuffle_each_iteration=True).prefetch(128).batch(
                batch, drop_remainder=True).repeat()
        iterator = tf.compat.v1.data.make_one_shot_iterator(dataset_train)
        self.next_element = iterator.get_next()

    @property
    def size(self):
        """样本数"""
        return self._size

    @property
    def labels(self):
        """标签"""
        return self.label_list

    @staticmethod
    def to_sparse(input_batch, label_batch):
        """密集输入转稀疏"""
        batch_inputs = input_batch
        batch_labels = utils.sparse.sparse_tuple_from_sequences(label_batch)
        return batch_inputs, batch_labels

    def generate_captcha(self, num) -> (list, list):
        _images = []
        _labels = []
        for i in range(num):
            try:
                image, labels, font_type = self.ran_captcha.create()
                _images.append(image)
                _labels.append(''.join(labels).encode())
            except Exception as e:
                print(e)
                pass
        return _images, _labels

    def generate_batch_by_tfrecords(self, session):
        """根据TFRecords生成当前批次,输入为当前TensorFlow会话,输出为稀疏型X和Y"""
        # print(session.graph)
        batch = self.batch_map[self.mode]

        _input, _label = session.run(self.next_element)
        if self.model_conf.da_random_captcha['Enable']:
            remain_batch = batch - len(_label)
            extra_input, extra_label = self.generate_captcha(remain_batch)
            _input = np.concatenate((_input, extra_input), axis=0)
            _label = np.concatenate((_label, extra_label), axis=0)

        input_batch = []
        label_batch = []
        for index, (i1, i2) in enumerate(zip(_input, _label)):
            try:
                label_array = self.encoder.text(i2)
                if self.model_conf.model_field == ModelField.Image:
                    input_array = self.encoder.image(i1)
                else:
                    input_array = self.encoder.text(i1)

                if input_array is None:
                    tf.compat.v1.logging.warn(
                        "{}, Cannot identify image file labeled: {}, ignored.".
                        format(input_array, label_array))
                    continue

                if isinstance(input_array, str):
                    tf.compat.v1.logging.warn(
                        "{}, \nInput errors labeled: {} [{}], ignored.".format(
                            input_array, i1, label_array))
                    continue
                if isinstance(label_array, dict):
                    # tf.logging.warn("The sample label {} contains invalid charset: {}.".format(
                    #     label_array['label'], label_array['char']
                    # ))
                    continue

                if input_array.shape[-1] != self.model_conf.image_channel:
                    # pass
                    tf.compat.v1.logging.warn(
                        "{}, \nInput shape: {}, ignored.".format(
                            self.model_conf.image_channel,
                            input_array.shape[-1]))
                    continue

                label_len_correct = len(
                    label_array) != self.model_conf.max_label_num
                using_cross_entropy = self.model_conf.loss_func == LossFunction.CrossEntropy
                if label_len_correct and using_cross_entropy and not self.model_conf.auto_padding:
                    tf.compat.v1.logging.warn(
                        "The number of labels must be fixed when using cross entropy, label: {}, "
                        "the number of tags is incorrect, ignored.".format(i2))
                    continue

                if len(
                        label_array
                ) > self.model_conf.max_label_num and using_cross_entropy:
                    tf.compat.v1.logging.warn(
                        "The number of label[{}] exceeds the maximum number of labels, ignored.{}"
                        .format(i2, label_array))
                    continue

                input_batch.append(input_array)
                label_batch.append(label_array)
            except OSError:
                random_suffix = hashlib.md5(i1).hexdigest()
                file_format = EXCEPT_FORMAT_MAP[self.model_conf.model_field]
                with open(file="oserror_{}.{}".format(random_suffix,
                                                      file_format),
                          mode="wb") as f:
                    f.write(i1)
                tf.compat.v1.logging.warn("OSError [{}]".format(i2))
                continue

        # 如果图片尺寸不固定则padding当前批次,使用最大的宽度作为序列最大长度
        if self.model_conf.model_field == ModelField.Image and self.model_conf.resize[
                0] == -1:
            input_batch = tf.keras.preprocessing.sequence.pad_sequences(
                sequences=input_batch,
                maxlen=None,
                dtype='float32',
                padding='post',
                truncating='post',
                value=0)

        self.label_list = label_batch
        return self.to_sparse(input_batch, self.label_list)
Ejemplo n.º 23
0
class VariationalDynamicalEncoder(object):
    def __init__(self,
                 params,
                 dim_x,
                 dim_z,
                 dim_recon,
                 p_x='bernoulli',
                 q_z='gaussian_marg',
                 p_z='gaussian_marg',
                 l2_loss=1e-6):

        self.params, self.dim_x, self.dim_z, self.dim_recon = params, dim_x, dim_z, dim_recon

        N, M, T, D, n_z = self.params.n_batch, self.params.batch_size, self.params.n_time_steps, self.params.d_inputs, self.params.n_z
        Tau_pred, C = self.params.pred_seq_len, self.params.n_classes

        self.distributions = {'p_x': p_x, 'q_z': q_z, 'p_z': p_z}

        self.l2_loss = l2_loss
        ''' Create Graph '''

        self.G = tf.Graph()

        with self.G.as_default():
            self.x = tf.placeholder(tf.float32, [None, T, D])  # train M, T, D

            self.x_con = tf.placeholder(tf.float32, [None, T, D])

            self.y_one_hot = tf.placeholder(tf.float32, [None, Tau_pred, C])

            self.encoder = Encoder(params=self.params)

            self.decoder = Decoder(params=self.params)

            self.predict = Predict_trend(params=self.params)

            self._objective()
            self.session = tf.Session(graph=self.G)
            self.saver = tf.train.Saver()

    def _gen_sample(self, mu, log_sigma_sq):

        epsilon = tf.random_normal((tf.shape(mu)), 0, 1)

        sample = tf.add(mu, tf.multiply(tf.exp(0.5 * log_sigma_sq), epsilon))

        return sample

    def _generate_zx(self, x, reuse=False):

        with tf.variable_scope('encoder', reuse=reuse):
            z_mu, z_lsgms = self.encoder.output(x, reuse=reuse)

            z_sample = self._gen_sample(z_mu, z_lsgms)

            return z_sample, z_mu, z_lsgms

    def _generate_xz(self, z, reuse=False):

        with tf.variable_scope('decoder', reuse=reuse):
            x_hat = self.decoder.output(z, reuse=reuse)

        return x_hat

    def _generate_yz(self, z, reuse=False):
        with tf.variable_scope('decoder', reuse=reuse):
            y_hat = self.predict.output(z, reuse=reuse)

        return y_hat

    def _objective(self):

        ############
        ''' Cost '''
        ############

        self.z_sample, self.z_mu, self.z_lsgms = self._generate_zx(self.x)

        self.x_hat = self._generate_xz(self.z_sample)

        self.z_tau, _, _ = self._generate_zx(self.x_hat, reuse=True)

        # if self.distributions['p_z'] == 'gaussian_marg':
        #     prior_z = tf.reduce_sum(utils.tf_gaussian_marg(self.z_mu, self.z_lsgms), 1)
        #
        # if self.distributions['q_z'] == 'gaussian_marg':
        #     post_z = tf.reduce_sum(utils.tf_gaussian_ent(self.z_lsgms), 1)
        #
        # if self.distributions['p_x'] == 'bernoulli':
        #     self.log_lik = - tf.reduce_sum(utils.tf_binary_xentropy(self.x, self.x_hat), 1)

        l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])

        latent_cost = -0.5 * tf.reduce_sum(
            1 + self.z_lsgms - tf.square(self.z_mu) - tf.exp(self.z_lsgms),
            axis=1)
        latent_loss = tf.reduce_mean(latent_cost)

        z_mean, z_var = tf.nn.moments(self.z_sample, axes=[0], keep_dims=True)
        z_tau_mean, z_tau_var = tf.nn.moments(self.z_tau,
                                              axes=[0],
                                              keep_dims=True)

        num = tf.reduce_mean(tf.multiply((self.z_sample - z_mean),
                                         (self.z_tau - z_tau_mean)),
                             axis=[0, 1])
        den = tf.reduce_mean(tf.multiply(z_var, tf.transpose(z_tau_var)))

        self.y_pred = self._generate_yz(self.z_sample)  # _, T, C

        # # TODO format code into metric
        # eval_metric_ops = {
        #     "accuracy": tf.metrics.accuracy(labels=self.y_one_hot, predictions=self.predictions["classes"])
        # }
        #
        # self.predictions = {
        #     "classes": tf.argmax(input=self.y_pred, axis=2),
        #     "class_target": tf.argmax(input=self.y_one_hot, axis=2),
        #     "probabilities": tf.nn.softmax(self.y_pred, name="softmax")
        # }

        _, self.accuracy = tf.metrics.accuracy(
            labels=tf.argmax(self.y_one_hot, 2),
            predictions=tf.argmax(self.y_pred, 2))

        # classify
        self.predict_loss = tf.losses.softmax_cross_entropy(
            onehot_labels=self.y_one_hot, logits=self.y_pred)

        self.corr_loss = -num / (den + 1e-6)

        self.mse_loss = tf.losses.mean_squared_error(labels=self.x_con,
                                                     predictions=self.x_hat)

        # self.cost = tf.reduce_mean(post_z - prior_z) + self.corr_loss + self.mse_loss + self.l2_loss * l2

        self.cost = latent_loss + self.mse_loss + self.l2_loss * l2 + self.predict_loss

        ##################
        ''' Evaluation '''
        ##################

        self.z_sample_eval, _, _ = self._generate_zx(self.x, reuse=True)
        self.x_hat_eval = self._generate_xz(self.z_sample_eval, reuse=True)

        self.eval_log_lik = -tf.reduce_mean(
            tf.reduce_sum(utils.tf_binary_xentropy(self.x, self.x_hat_eval),
                          1))

    def train(self,
              x,
              x_con,
              x_valid,
              x_con_valid,
              epochs,
              num_batches,
              print_every=1,
              learning_rate=3e-4,
              beta1=0.9,
              beta2=0.999,
              seed=31415,
              stop_iter=100,
              y_one_hot=None,
              save_path=None,
              load_path=None):

        self.num_examples = x.shape[0]
        self.num_batches = num_batches

        assert self.num_examples % self.num_batches == 0, '#Examples % #Batches != 0'

        self.batch_size = self.num_examples // self.num_batches
        ''' Session and Summary '''
        self.save_path = save_path
        if save_path is None:
            self.save_ckpt_path = 'checkpoints/model_VAE_{}-{}_{}.cpkt'.format(
                learning_rate, self.batch_size, time.time())
        else:
            self.save_ckpt_path = save_path + 'model_VAE_{}-{}_{}.cpkt'.format(
                learning_rate, self.batch_size, time.time())

        np.random.seed(seed)
        tf.set_random_seed(seed)

        with self.G.as_default():

            self.optimizer_origin = tf.train.AdamOptimizer(
                learning_rate=learning_rate, beta1=beta1, beta2=beta2)
            self.optimizer = tf.contrib.estimator.clip_gradients_by_norm(
                self.optimizer_origin, clip_norm=1.0)
            self.train_op = self.optimizer.minimize(self.cost)

            self.init_g = tf.global_variables_initializer()
            self.init_l = tf.local_variables_initializer()
            self._test_vars = None

        with self.session as sess:

            # sess.run(self.init)
            sess.run(self.init_g)
            sess.run(self.init_l)

            if load_path == 'default':
                full_path = tf.train.latest_checkpoint(self.save_path)
                print('restore model from {}'.format(full_path))
                self.saver.restore(sess, full_path)

            elif load_path is not None:
                full_path = tf.train.latest_checkpoint(load_path)
                print('restore model from {}'.format(full_path))
                self.saver.restore(sess, full_path)

            best_eval_log_lik = -np.inf
            best_mse = np.inf

            stop_counter = 0

            for epoch in range(epochs):

                # TODO create shuffle Data
                # X_train_shuffle, X_tau_train_shuffle, y_classify_train_shuffle = shuffle_data(X_train, X_tau_train, y_classify_train)
                ''' Training '''
                training_cost, accuracy = 0, 0
                for x_batch, x_con_batch, y_one_hot_batch in utils.feed_numpy(
                        self.batch_size, x, x_con, y=y_one_hot):
                    training_result = sess.run(
                        [self.train_op, self.cost, self.accuracy],
                        feed_dict={
                            self.x: x_batch,
                            self.x_con: x_con_batch,
                            self.y_one_hot: y_one_hot_batch
                        })

                    training_cost += training_result[1]
                    accuracy += training_result[2]

                training_cost, accuracy = training_cost / self.num_batches, accuracy / self.num_batches
                ''' Evaluation '''

                stop_counter += 1

                if epoch % print_every == 0:

                    # test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES)
                    # if test_vars:
                    #     if test_vars != self._test_vars:
                    #         self._test_vars = list(test_vars)
                    #         self._test_var_init_op = tf.initialize_variables(test_vars)
                    #     self._test_var_init_op.run()

                    mse = sess.run(self.mse_loss,
                                   feed_dict={
                                       self.x: x,
                                       self.x_con: x_con
                                   })

                    # corr_loss = self.corr_loss.eval(feed_dict={self.x: x, self.x_con: x_con})

                    if mse < best_mse:
                        best_eval_log_lik = mse
                        self.saver.save(sess, self.save_ckpt_path)
                        stop_counter = 0

                    utils.print_metrics(epoch + 1,
                                        ['Training', ' cost', training_cost],
                                        ['Accuracy', 'train', accuracy],
                                        ['MSE    ', ' train', mse])

                if stop_counter >= stop_iter:
                    print(
                        'Stopping No change in validation log-likelihood for {} iterations'
                        .format(stop_iter))
                    print('Best validation log-likelihood: {}'.format(
                        best_eval_log_lik))
                    print('Model saved in {}'.format(self.save_path))
                    break

    def encode(self, x, sample=False):

        if sample:
            return self.session.run([self.z_sample, self.z_mu, self.z_lsgms],
                                    feed_dict={self.x: x})
        else:
            return self.session.run([self.z_mu, self.z_lsgms],
                                    feed_dict={self.x: x})

    def decode(self, z):

        return self.session.run([self.x_hat], feed_dict={self.z_sample: z})
Ejemplo n.º 24
0
    def initialize(self, train_data):
        """Initializes model parameters from pre-defined hyperparameters and other hyperparameters
           that are computed based on statistics over the training data."""
        nl_lengths = []
        code_lengths = []
        nl_token_counter = Counter()
        code_token_counter = Counter()

        for ex in train_data:
            trg_sequence = [START
                            ] + ex.span_minimal_diff_comment_tokens + [END]
            nl_token_counter.update(trg_sequence)
            nl_lengths.append(len(trg_sequence))

            old_nl_sequence = ex.old_comment_tokens
            nl_token_counter.update(old_nl_sequence)
            nl_lengths.append(len(old_nl_sequence))

            code_sequence = ex.span_diff_code_tokens
            code_token_counter.update(code_sequence)
            code_lengths.append(len(code_sequence))

        self.max_nl_length = int(
            np.percentile(np.asarray(sorted(nl_lengths)), LENGTH_CUTOFF_PCT))
        self.max_code_length = int(
            np.percentile(np.asarray(sorted(code_lengths)), LENGTH_CUTOFF_PCT))
        self.max_vocab_extension = self.max_nl_length + self.max_code_length

        nl_counts = np.asarray(sorted(nl_token_counter.values()))
        nl_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1
        code_counts = np.asarray(sorted(code_token_counter.values()))
        code_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1

        self.embedding_store = EmbeddingStore(nl_threshold, NL_EMBEDDING_SIZE,
                                              nl_token_counter, code_threshold,
                                              CODE_EMBEDDING_SIZE,
                                              code_token_counter, DROPOUT_RATE,
                                              True)

        self.code_encoder = Encoder(CODE_EMBEDDING_SIZE, HIDDEN_SIZE,
                                    NUM_LAYERS, DROPOUT_RATE)
        self.nl_encoder = Encoder(NL_EMBEDDING_SIZE, HIDDEN_SIZE, NUM_LAYERS,
                                  DROPOUT_RATE)
        self.decoder = UpdateDecoder(NL_EMBEDDING_SIZE, DECODER_HIDDEN_SIZE,
                                     2 * HIDDEN_SIZE, self.embedding_store,
                                     NL_EMBEDDING_SIZE, DROPOUT_RATE)
        self.encoder_final_to_decoder_initial = nn.Parameter(
            torch.randn(2 * NUM_ENCODERS * HIDDEN_SIZE,
                        DECODER_HIDDEN_SIZE,
                        dtype=torch.float,
                        requires_grad=True))

        self.code_features_to_embedding = nn.Linear(CODE_EMBEDDING_SIZE +
                                                    NUM_CODE_FEATURES,
                                                    CODE_EMBEDDING_SIZE,
                                                    bias=False)
        self.nl_features_to_embedding = nn.Linear(NL_EMBEDDING_SIZE +
                                                  NUM_NL_FEATURES,
                                                  NL_EMBEDDING_SIZE,
                                                  bias=False)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=LR)
Ejemplo n.º 25
0
class CommentUpdateModel(nn.Module):
    """Edit model which learns to map a sequence of code edits to a sequence of comment edits and then applies the edits to the
       old comment in order to produce an updated comment."""
    def __init__(self, model_path):
        super(CommentUpdateModel, self).__init__()
        self.model_path = model_path
        self.torch_device_name = 'cpu'

    def initialize(self, train_data):
        """Initializes model parameters from pre-defined hyperparameters and other hyperparameters
           that are computed based on statistics over the training data."""
        nl_lengths = []
        code_lengths = []
        nl_token_counter = Counter()
        code_token_counter = Counter()

        for ex in train_data:
            trg_sequence = [START
                            ] + ex.span_minimal_diff_comment_tokens + [END]
            nl_token_counter.update(trg_sequence)
            nl_lengths.append(len(trg_sequence))

            old_nl_sequence = ex.old_comment_tokens
            nl_token_counter.update(old_nl_sequence)
            nl_lengths.append(len(old_nl_sequence))

            code_sequence = ex.span_diff_code_tokens
            code_token_counter.update(code_sequence)
            code_lengths.append(len(code_sequence))

        self.max_nl_length = int(
            np.percentile(np.asarray(sorted(nl_lengths)), LENGTH_CUTOFF_PCT))
        self.max_code_length = int(
            np.percentile(np.asarray(sorted(code_lengths)), LENGTH_CUTOFF_PCT))
        self.max_vocab_extension = self.max_nl_length + self.max_code_length

        nl_counts = np.asarray(sorted(nl_token_counter.values()))
        nl_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1
        code_counts = np.asarray(sorted(code_token_counter.values()))
        code_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1

        self.embedding_store = EmbeddingStore(nl_threshold, NL_EMBEDDING_SIZE,
                                              nl_token_counter, code_threshold,
                                              CODE_EMBEDDING_SIZE,
                                              code_token_counter, DROPOUT_RATE,
                                              True)

        self.code_encoder = Encoder(CODE_EMBEDDING_SIZE, HIDDEN_SIZE,
                                    NUM_LAYERS, DROPOUT_RATE)
        self.nl_encoder = Encoder(NL_EMBEDDING_SIZE, HIDDEN_SIZE, NUM_LAYERS,
                                  DROPOUT_RATE)
        self.decoder = UpdateDecoder(NL_EMBEDDING_SIZE, DECODER_HIDDEN_SIZE,
                                     2 * HIDDEN_SIZE, self.embedding_store,
                                     NL_EMBEDDING_SIZE, DROPOUT_RATE)
        self.encoder_final_to_decoder_initial = nn.Parameter(
            torch.randn(2 * NUM_ENCODERS * HIDDEN_SIZE,
                        DECODER_HIDDEN_SIZE,
                        dtype=torch.float,
                        requires_grad=True))

        self.code_features_to_embedding = nn.Linear(CODE_EMBEDDING_SIZE +
                                                    NUM_CODE_FEATURES,
                                                    CODE_EMBEDDING_SIZE,
                                                    bias=False)
        self.nl_features_to_embedding = nn.Linear(NL_EMBEDDING_SIZE +
                                                  NUM_NL_FEATURES,
                                                  NL_EMBEDDING_SIZE,
                                                  bias=False)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=LR)

    def get_batches(self, dataset, shuffle=False):
        """Divides the dataset into batches based on pre-defined BATCH_SIZE hyperparameter.
           Each batch is tensorized so that it can be directly passed into the network."""
        batches = []
        if shuffle:
            random.shuffle(dataset)

        curr_idx = 0
        while curr_idx < len(dataset):
            start_idx = curr_idx
            end_idx = min(start_idx + BATCH_SIZE, len(dataset))

            code_token_ids = []
            code_lengths = []
            old_nl_token_ids = []
            old_nl_lengths = []
            trg_token_ids = []
            trg_extended_token_ids = []
            trg_lengths = []
            invalid_copy_positions = []
            inp_str_reps = []
            inp_ids = []
            code_features = []
            nl_features = []

            for i in range(start_idx, end_idx):
                code_sequence = dataset[i].span_diff_code_tokens
                code_sequence_ids = self.embedding_store.get_padded_code_ids(
                    code_sequence, self.max_code_length)
                code_length = min(len(code_sequence), self.max_code_length)
                code_token_ids.append(code_sequence_ids)
                code_lengths.append(code_length)

                old_nl_sequence = dataset[i].old_comment_tokens
                old_nl_length = min(len(old_nl_sequence), self.max_nl_length)
                old_nl_sequence_ids = self.embedding_store.get_padded_nl_ids(
                    old_nl_sequence, self.max_nl_length)

                old_nl_token_ids.append(old_nl_sequence_ids)
                old_nl_lengths.append(old_nl_length)

                ex_inp_str_reps = []
                ex_inp_ids = []

                extra_counter = len(self.embedding_store.nl_vocabulary)
                max_limit = len(self.embedding_store.nl_vocabulary
                                ) + self.max_vocab_extension
                out_ids = set()

                copy_inputs = code_sequence[:
                                            code_length] + old_nl_sequence[:
                                                                           old_nl_length]
                for c in copy_inputs:
                    nl_id = self.embedding_store.get_nl_id(c)
                    if self.embedding_store.is_nl_unk(
                            nl_id) and extra_counter < max_limit:
                        if c in ex_inp_str_reps:
                            nl_id = ex_inp_ids[ex_inp_str_reps.index(c)]
                        else:
                            nl_id = extra_counter
                            extra_counter += 1

                    out_ids.add(nl_id)
                    ex_inp_str_reps.append(c)
                    ex_inp_ids.append(nl_id)

                trg_sequence = trg_sequence = [
                    START
                ] + dataset[i].span_minimal_diff_comment_tokens + [END]
                trg_sequence_ids = self.embedding_store.get_padded_nl_ids(
                    trg_sequence, self.max_nl_length)
                trg_extended_sequence_ids = self.embedding_store.get_extended_padded_nl_ids(
                    trg_sequence, self.max_nl_length, ex_inp_ids,
                    ex_inp_str_reps)

                trg_token_ids.append(trg_sequence_ids)
                trg_extended_token_ids.append(trg_extended_sequence_ids)
                trg_lengths.append(min(len(trg_sequence), self.max_nl_length))
                inp_str_reps.append(ex_inp_str_reps)
                inp_ids.append(ex_inp_ids)

                invalid_copy_positions.append(
                    get_invalid_copy_locations(ex_inp_str_reps,
                                               self.max_vocab_extension,
                                               trg_sequence,
                                               self.max_nl_length))
                code_features.append(
                    get_code_features(code_sequence, dataset[i],
                                      self.max_code_length))
                nl_features.append(
                    get_nl_features(old_nl_sequence, dataset[i],
                                    self.max_nl_length))

            batches.append(
                UpdateBatchData(
                    torch.tensor(code_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(code_lengths,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(old_nl_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(old_nl_lengths,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_extended_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_lengths,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(invalid_copy_positions,
                                 dtype=torch.uint8,
                                 device=self.get_device()), inp_str_reps,
                    inp_ids,
                    torch.tensor(code_features,
                                 dtype=torch.float32,
                                 device=self.get_device()),
                    torch.tensor(nl_features,
                                 dtype=torch.float32,
                                 device=self.get_device())))
            curr_idx = end_idx
        return batches

    def get_encoder_output(self, batch_data):
        """Gets hidden states, final state, and a length masks corresponding to each encoder."""
        code_embedded_tokens = self.code_features_to_embedding(
            torch.cat([
                self.embedding_store.get_code_embeddings(batch_data.code_ids),
                batch_data.code_features
            ],
                      dim=-1))
        code_hidden_states, code_final_state = self.code_encoder.forward(
            code_embedded_tokens, batch_data.code_lengths, self.get_device())

        old_nl_embedded_tokens = self.nl_features_to_embedding(
            torch.cat([
                self.embedding_store.get_nl_embeddings(batch_data.old_nl_ids),
                batch_data.nl_features
            ],
                      dim=-1))
        old_nl_hidden_states, old_nl_final_state = self.nl_encoder.forward(
            old_nl_embedded_tokens, batch_data.old_nl_lengths,
            self.get_device())

        encoder_hidden_states, input_lengths = merge_encoder_outputs(
            code_hidden_states, batch_data.code_lengths, old_nl_hidden_states,
            batch_data.old_nl_lengths, self.get_device())

        encoder_final_state = torch.einsum(
            'bd,dh->bh',
            torch.cat([code_final_state, old_nl_final_state], dim=-1),
            self.encoder_final_to_decoder_initial)
        mask = (torch.arange(encoder_hidden_states.shape[1],
                             device=self.get_device()).view(1, -1) >=
                input_lengths.view(-1, 1)).unsqueeze(1)

        code_masks = (torch.arange(code_hidden_states.shape[1],
                                   device=self.get_device()).view(1, -1) >=
                      batch_data.code_lengths.view(-1, 1)).unsqueeze(1)
        old_nl_masks = (torch.arange(old_nl_hidden_states.shape[1],
                                     device=self.get_device()).view(1, -1) >=
                        batch_data.old_nl_lengths.view(-1, 1)).unsqueeze(1)

        return encoder_hidden_states, encoder_final_state, mask, code_hidden_states, old_nl_hidden_states, code_masks, old_nl_masks

    def forward(self, batch_data):
        """Computes the loss against the gold sequences corresponding to the examples in the batch. NOTE: teacher-forcing."""
        encoder_hidden_states, initial_state, inp_length_mask, code_hidden_states, old_nl_hidden_states, code_masks, old_nl_masks = self.get_encoder_output(
            batch_data)
        decoder_input_embeddings = self.embedding_store.get_nl_embeddings(
            batch_data.trg_nl_ids)[:, :-1]
        decoder_states, decoder_final_state, generation_logprobs, copy_logprobs = self.decoder.forward(
            initial_state, decoder_input_embeddings, encoder_hidden_states,
            code_hidden_states, old_nl_hidden_states, inp_length_mask,
            code_masks, old_nl_masks)

        gold_generation_ids = batch_data.trg_nl_ids[:, 1:].unsqueeze(-1)
        gold_generation_logprobs = torch.gather(
            input=generation_logprobs, dim=-1,
            index=gold_generation_ids).squeeze(-1)
        copy_logprobs = copy_logprobs.masked_fill(
            batch_data.invalid_copy_positions[:, 1:, :encoder_hidden_states.
                                              shape[1]], float('-inf'))
        gold_copy_logprobs = copy_logprobs.logsumexp(dim=-1)

        gold_logprobs = torch.logsumexp(torch.cat([
            gold_generation_logprobs.unsqueeze(-1),
            gold_copy_logprobs.unsqueeze(-1)
        ],
                                                  dim=-1),
                                        dim=-1)
        gold_logprobs = gold_logprobs.masked_fill(
            torch.arange(batch_data.trg_nl_ids[:, 1:].shape[-1],
                         device=self.get_device()).unsqueeze(0) >=
            batch_data.trg_nl_lengths.unsqueeze(-1) - 1, 0)

        likelihood_by_example = gold_logprobs.sum(dim=-1)

        # Normalizing by length. Seems to help
        likelihood_by_example = likelihood_by_example / (
            batch_data.trg_nl_lengths - 1).float()

        return -(likelihood_by_example).mean()

    def beam_decode(self, batch_data):
        """Performs beam search on the decoder to get candidate predictions for every example in the batch."""
        encoder_hidden_states, initial_state, inp_length_mask, code_hidden_states, old_nl_hidden_states, code_masks, old_nl_masks = self.get_encoder_output(
            batch_data)
        predictions, scores = self.decoder.beam_decode(
            initial_state, encoder_hidden_states, code_hidden_states,
            old_nl_hidden_states, inp_length_mask, self.max_nl_length,
            batch_data, code_masks, old_nl_masks, self.get_device())

        decoded_output = []
        batch_size = initial_state.shape[0]

        for i in range(batch_size):
            beam_output = []
            for j in range(len(predictions[i])):
                token_ids = predictions[i][j]
                tokens = self.embedding_store.get_nl_tokens(
                    token_ids, batch_data.input_ids[i],
                    batch_data.input_str_reps[i])
                beam_output.append((tokens, scores[i][j]))
            decoded_output.append(beam_output)
        return decoded_output

    def get_device(self):
        """Returns the proper device."""
        if self.torch_device_name == 'gpu':
            return torch.device('cuda')
        else:
            return torch.device('cpu')

    def run_gradient_step(self, batch_data):
        """Performs gradient step."""
        self.optimizer.zero_grad()
        loss = self.forward(batch_data)
        loss.backward()
        self.optimizer.step()
        return float(loss.cpu())

    def run_train(self, train_data, valid_data):
        """Runs training over the entire training set across several epochs. Following each epoch,
           loss on the validation data is computed. If the validation loss has improved, save the model.
           Early-stopping is employed to stop training if validation hasn't improved for a certain number
           of epochs."""
        valid_batches = self.get_batches(valid_data)
        train_batches = self.get_batches(train_data, shuffle=True)

        best_loss = float('inf')
        patience_tally = 0

        for epoch in range(MAX_EPOCHS):
            if patience_tally > PATIENCE:
                print('Terminating')
                break

            self.train()
            random.shuffle(train_batches)

            train_loss = 0
            for batch_data in train_batches:
                train_loss += self.run_gradient_step(batch_data)

            self.eval()
            validation_loss = 0
            with torch.no_grad():
                for batch_data in valid_batches:
                    validation_loss += float(self.forward(batch_data).cpu())

            validation_loss = validation_loss / len(valid_batches)

            if validation_loss <= best_loss:
                torch.save(self, self.model_path)
                saved = True
                best_loss = validation_loss
                patience_tally = 0
            else:
                saved = False
                patience_tally += 1

            print('Epoch: {}'.format(epoch))
            print('Training loss: {}'.format(train_loss / len(train_batches)))
            print('Validation loss: {}'.format(validation_loss))
            if saved:
                print('Saved')
            print('-----------------------------------')

    def get_likelihood_scores(self, comment_generation_model,
                              formatted_beam_predictions, test_example):
        """Computes the generation likelihood score for each beam prediction based on the pre-trained
           comment generation model."""
        batch_examples = []
        for j in range(len(formatted_beam_predictions)):
            batch_examples.append(
                Example(test_example.id, test_example.old_comment,
                        test_example.old_comment_tokens,
                        ' '.join(formatted_beam_predictions[j]),
                        formatted_beam_predictions[j], test_example.old_code,
                        test_example.old_code_tokens, test_example.new_code,
                        test_example.new_code_tokens))

        batch_data = comment_generation_model.get_batches(batch_examples)[0]
        return np.asarray(
            comment_generation_model.compute_generation_likelihood(batch_data))

    def get_generation_model(self):
        """Loads the pre-trained comment generation model needed for re-ranking.
           NOTE: the path is hard-coded here so may need to be modified."""
        comment_generation_model = torch.load(FULL_GENERATION_MODEL_PATH)
        comment_generation_model.torch_device_name = 'cpu'
        comment_generation_model.cpu()
        for c in comment_generation_model.children():
            c.cpu()
        comment_generation_model.eval()
        return comment_generation_model

    def run_evaluation(self, test_data, rerank):
        """Predicts updated comments for all comments in the test set and computes evaluation metrics."""
        self.eval()

        test_batches = self.get_batches(test_data)
        test_predictions = []
        generation_predictions = []

        gold_strs = []
        pred_strs = []
        src_strs = []

        references = []
        pred_instances = []

        with torch.no_grad():
            for b_idx, batch_data in enumerate(test_batches):
                test_predictions.extend(self.beam_decode(batch_data))

        if not rerank:
            test_predictions = [pred[0][0] for pred in test_predictions]
        else:
            comment_generation_model = self.get_generation_model()
            with torch.no_grad():
                generation_test_batches = comment_generation_model.get_batches(
                    test_data)
                for gen_batch_data in generation_test_batches:
                    generation_predictions.extend(
                        comment_generation_model.greedy_decode(gen_batch_data))

            reranked_predictions = []
            for i in range(len(test_predictions)):
                formatted_beam_predictions = []
                model_scores = np.zeros(len(test_predictions[i]),
                                        dtype=np.float)
                generated = generation_predictions[i]
                old_comment_tokens = test_data[i].old_comment_tokens

                for b, (b_pred, b_score) in enumerate(test_predictions[i]):
                    b_pred_str = diff_utils.format_minimal_diff_spans(
                        test_data[i].old_comment_tokens, b_pred)
                    formatted_beam_predictions.append(b_pred_str.split(' '))
                    model_scores[b] = b_score

                likelihood_scores = self.get_likelihood_scores(
                    comment_generation_model, formatted_beam_predictions,
                    test_data[i])
                old_meteor_scores = compute_sentence_meteor(
                    [[old_comment_tokens]
                     for _ in range(len(formatted_beam_predictions))],
                    formatted_beam_predictions)

                rerank_scores = [
                    (model_scores[j] * MODEL_LAMBDA) +
                    (likelihood_scores[j] * LIKELIHOOD_LAMBDA) +
                    (old_meteor_scores[j] * OLD_METEOR_LAMBDA)
                    for j in range(len(formatted_beam_predictions))
                ]

                sorted_indices = np.argsort(-np.asarray(rerank_scores))
                reranked_predictions.append(
                    test_predictions[i][sorted_indices[0]][0])

            test_predictions = reranked_predictions

        for i in range(len(test_predictions)):
            pred_str = diff_utils.format_minimal_diff_spans(
                test_data[i].old_comment_tokens, test_predictions[i])
            prediction = pred_str.split()
            gold_str = test_data[i].new_comment

            gold_strs.append(gold_str)
            pred_strs.append(pred_str)
            src_strs.append(test_data[i].old_comment)

            references.append([test_data[i].new_comment_tokens])
            pred_instances.append(prediction)

            print('Old comment: {}'.format(test_data[i].old_comment))
            print('Gold comment: {}'.format(gold_str))
            print('Predicted comment: {}'.format(pred_str))
            print('Raw prediction: {}\n'.format(' '.join(test_predictions[i])))
            try:
                print('Old code:\n{}\n'.format(get_old_code(test_data[i])))
            except:
                print('Failed to print old code\n')
            print('New code:\n{}\n'.format(get_new_code(test_data[i])))
            print('----------------------------')

        if rerank:
            prediction_file = '{}_rerank.txt'.format(
                self.model_path.split('.')[0])
        else:
            prediction_file = '{}.txt'.format(self.model_path.split('.')[0])

        write_predictions(pred_strs, prediction_file)
        write_predictions(src_strs, 'src.txt')
        write_predictions(gold_strs, 'ref.txt')

        predicted_accuracy = compute_accuracy(gold_strs, pred_strs)
        predicted_bleu = compute_bleu(references, pred_instances)
        predicted_meteor = compute_meteor(references, pred_instances)
        predicted_sari = compute_sari(test_data, pred_instances)
        predicted_gleu = compute_gleu(test_data, 'src.txt', 'ref.txt',
                                      prediction_file)

        print('Predicted Accuracy: {}'.format(predicted_accuracy))
        print('Predicted BLEU: {}'.format(predicted_bleu))
        print('Predicted Meteor: {}'.format(predicted_meteor))
        print('Predicted SARI: {}'.format(predicted_sari))
        print('Predicted GLEU: {}\n'.format(predicted_gleu))
Ejemplo n.º 26
0
    def __init__(self, args):
        self._log_step = args.log_step
        self._batch_size = args.batch_size
        self._image_size = args.image_size
        self._latent_dim = args.latent_dim
        self._coeff_gan = args.coeff_gan
        self._coeff_vae = args.coeff_vae
        self._coeff_reconstruct = args.coeff_reconstruct
        self._coeff_latent = args.coeff_latent
        self._coeff_kl = args.coeff_kl
        self._norm = 'instance' if args.instance_normalization else 'batch'
        self._use_resnet = args.use_resnet

        self._augment_size = self._image_size + (30 if self._image_size == 256
                                                 else 15)
        self._image_shape = [self._image_size, self._image_size, 3]

        self.is_train = tf.placeholder(tf.bool, name='is_train')
        self.lr = tf.placeholder(tf.float32, name='lr')
        self.global_step = tf.train.get_or_create_global_step(graph=None)

        image_a = self.image_a = \
            tf.placeholder(tf.float32, [self._batch_size] + self._image_shape, name='image_a')
        image_b = self.image_b = \
            tf.placeholder(tf.float32, [self._batch_size] + self._image_shape, name='image_b')
        z = self.z = \
            tf.placeholder(tf.float32, [self._batch_size, self._latent_dim], name='z')

        # Data augmentation
        seed = random.randint(0, 2**31 - 1)

        def augment_image(image):
            image = tf.image.resize_images(
                image, [self._augment_size, self._augment_size])
            image = tf.random_crop(image,
                                   [self._batch_size] + self._image_shape,
                                   seed=seed)
            image = tf.map_fn(
                lambda x: tf.image.random_flip_left_right(x, seed), image)
            return image

        image_a = tf.cond(self.is_train, lambda: augment_image(image_a),
                          lambda: image_a)
        image_b = tf.cond(self.is_train, lambda: augment_image(image_b),
                          lambda: image_b)

        # Generator
        G = Generator('G',
                      is_train=self.is_train,
                      norm=self._norm,
                      image_size=self._image_size)

        # Discriminator
        D = Discriminator('D',
                          is_train=self.is_train,
                          norm=self._norm,
                          activation='leaky',
                          image_size=self._image_size)

        # Encoder
        E = Encoder('E',
                    is_train=self.is_train,
                    norm=self._norm,
                    activation='relu',
                    image_size=self._image_size,
                    latent_dim=self._latent_dim,
                    use_resnet=self._use_resnet)

        # conditional VAE-GAN: B -> z -> B'
        z_encoded, z_encoded_mu, z_encoded_log_sigma = E(image_b)
        image_ab_encoded = G(image_a, z_encoded)

        # conditional Latent Regressor-GAN: z -> B' -> z'
        image_ab = self.image_ab = G(image_a, z)
        z_recon, z_recon_mu, z_recon_log_sigma = E(image_ab)

        # Discriminate real/fake images
        D_real = D(image_b)
        D_fake = D(image_ab)
        D_fake_encoded = D(image_ab_encoded)

        loss_vae_gan = (tf.reduce_mean(tf.squared_difference(D_real, 0.9)) +
                        tf.reduce_mean(tf.square(D_fake_encoded)))

        loss_image_cycle = tf.reduce_mean(tf.abs(image_b - image_ab_encoded))

        loss_gan = (tf.reduce_mean(tf.squared_difference(D_real, 0.9)) +
                    tf.reduce_mean(tf.square(D_fake)))

        loss_latent_cycle = tf.reduce_mean(tf.abs(z - z_recon))

        loss_kl = -0.5 * tf.reduce_mean(1 + 2 * z_encoded_log_sigma -
                                        z_encoded_mu**2 -
                                        tf.exp(2 * z_encoded_log_sigma))

        loss = self._coeff_vae * loss_vae_gan - self._coeff_reconstruct * loss_image_cycle + \
            self._coeff_gan * loss_gan - self._coeff_latent * loss_latent_cycle - \
            self._coeff_kl * loss_kl

        # Optimizer
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.optimizer_D = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
                                .minimize(loss, var_list=D.var_list, global_step=self.global_step)
            self.optimizer_G = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
                                .minimize(-loss, var_list=G.var_list)
            self.optimizer_E = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
                                .minimize(-loss, var_list=E.var_list)

        # Summaries
        self.loss_vae_gan = loss_vae_gan
        self.loss_image_cycle = loss_image_cycle
        self.loss_latent_cycle = loss_latent_cycle
        self.loss_gan = loss_gan
        self.loss_kl = loss_kl
        self.loss = loss

        tf.summary.scalar('loss/vae_gan', loss_vae_gan)
        tf.summary.scalar('loss/image_cycle', loss_image_cycle)
        tf.summary.scalar('loss/latent_cycle', loss_latent_cycle)
        tf.summary.scalar('loss/gan', loss_gan)
        tf.summary.scalar('loss/kl', loss_kl)
        tf.summary.scalar('loss/total', loss)
        tf.summary.scalar('model/D_real', tf.reduce_mean(D_real))
        tf.summary.scalar('model/D_fake', tf.reduce_mean(D_fake))
        tf.summary.scalar('model/D_fake_encoded',
                          tf.reduce_mean(D_fake_encoded))
        tf.summary.scalar('model/lr', self.lr)
        tf.summary.image('image/A', image_a[0:1])
        tf.summary.image('image/B', image_b[0:1])
        tf.summary.image('image/A-B', image_ab[0:1])
        tf.summary.image('image/A-B_encoded', image_ab_encoded[0:1])
        self.summary_op = tf.summary.merge_all()
Ejemplo n.º 27
0
            out_channels //= channel_shrinkage_factor

        i = num_conv_volumes
        layers.append(('deconv%d' % i, nn.ConvTranspose2d(in_channels, 3, *conv_spec),))
        layers.append(('tanh', nn.Tanh()))

        self._net = nn.Sequential(OrderedDict(layers))

    def forward(self, x):
        assert x.size()[1] == self._num_in_channels
        return self._net(x)


if __name__ == '__main__':
    import torch
    from encoder import Encoder

    input_image_size = 64
    expected_image_shape = (3, input_image_size, input_image_size)

    enc = Encoder(input_image_size)
    dec = Decoder(enc.num_out_channels)

    x = torch.autograd.Variable(torch.rand(1, *expected_image_shape))
    z = enc.forward(x)
    x_ = dec.forward(z)

    print(dec)
    print(x_.size())
    assert x_.size()[-3:] == expected_image_shape
# prepare the training data for the NetworkManager
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

dataset = [x_train, y_train, x_test,
           y_test]  # pack the dataset for the NetworkManager

with policy_sess.as_default():
    # create the Encoder and build the internal policy network
    controller = Encoder(policy_sess,
                         state_space,
                         B=B,
                         K=K_,
                         reg_param=REGULARIZATION,
                         controller_cells=CONTROLLER_CELLS,
                         restore_controller=RESTORE_CONTROLLER)

# create the Network Manager
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, batchsize=BATCHSIZE)
print()

# train for number of trails
for trial in range(B):
    with policy_sess.as_default():
        K.set_session(policy_sess)

        if trial == 0:
            k = None
Ejemplo n.º 29
0
ON = True
OFF = False

# Entry
ADDRESS = 0
USERNAME = 1

if __name__ == '__main__':
    # Components
	ld = Led(red_pin=4, green_pin=17)
	ld.setup()
    
    motor = Motor(step_pin=23, dir_pin=24, power_pin=25)
	motor.setup()

    encoder = Encoder(clk_pin=22, dir_pin=27)
    encoder.setup()

	db = Database(host, "philosoraptor", "explosion", "doorman")
	db.setup()

	rfid = Rfid()
	rfid.setup()

    # Main Loop
	while True:
		ld.setstate(ld.BOTH)
		addr = rfid.getaddr()
		if addr:
			#print "Address: %s" % addr
			found, entry = db.checkaddr(addr)
Ejemplo n.º 30
0
def train_encoder(train_set, hidden_num, opt, learning_r, epoch=500, batch_size=32, pre_trained_path='', test_set=None,
                  name="Validation", tensorboard=False):
    data_loader = make_data_loader(data_to_loader=train_set, batch_size=batch_size)
    best_test_loss = 9999999999999
    best_test_accuracy = 0
    best_train_accuracy = 0
    best_train_loss = 9999999999
    accuracy_array = []
    train_accuracy_array = []
    is_pretrained = False

    net = Encoder()
    predictor = Predictor(hidden_num=hidden_num)

    if pre_trained_path != '':
        net = torch.load(pre_trained_path)["model"]
        is_pretrained = True

    if use_gpu:
        net = net.cuda()
        predictor = predictor.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(net.parameters()) + list(predictor.parameters())
    if opt == 'ADAM':
        optimizer = optim.Adam(params, lr=learning_r)
    else:
        optimizer = optim.SGD(params, lr=learning_r)

    for epoch in range(epoch):  # loop over the dataset multiple times
        print("Epoch: ", epoch)

        for i, data in enumerate(data_loader, 0):
            # get the inputs
            inputs, labels = data

            if use_gpu:
                inputs = inputs.cuda()
                labels = labels.cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            outputs = predictor(outputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        train_loss, accuracy = test_encoder(net, predictor, train_set, name="Training", show_log=True)
        train_accuracy_array.append(accuracy)

        if train_loss < best_train_loss:
            best_train_loss = train_loss
            best_train_accuracy = accuracy

        if test_set is not None:
            test_loss, accuracy = test_encoder(net, predictor, test_set, name=name, show_log=True)
            accuracy_array.append(accuracy)
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                best_test_accuracy = accuracy

        if is_pretrained:
            name = "is_pretrained"
        else:
            name = "scratch"

        if tensorboard:
            writer_train.add_scalar('loss_'+name, best_train_loss, epoch)
            writer_test.add_scalar('losss_'+name, best_test_loss, epoch)

            writer_train.add_scalar('top-1_accuracys_'+name, best_train_accuracy, epoch)
            writer_test.add_scalar('top-1_accuracys_'+name, best_test_accuracy, epoch)

            if len(accuracy_array) < 3:
                writer_train.add_scalar('top-3_accuracys_'+name, 0, epoch)
                writer_test.add_scalar('top-3_accuracys_'+name, 0, epoch)
            else:
                writer_train.add_scalar('top-3_accuracys_'+name, sorted(train_accuracy_array)[-3], epoch)
                writer_test.add_scalar('top-3_accuracys_'+name, sorted(accuracy_array)[-3], epoch)

    return net, predictor, best_test_loss, best_test_accuracy, accuracy_array
Ejemplo n.º 31
0
class Robot():
	
	"""
		Paul v0.0:
		posX si posY reprezinta coordonatele de pe plansa.
		logicX si logicY sunt coordonatele relativ la pozitia de start a
		robotului; sunt coordonate deduse de robot.
	"""
	
	MaxSpeed = 24
	
	def __init__(self, posX, posY, posTheta):
		
		self.w = 15
		self.h = 10
		
		self.heading = 0
		self.posTheta = posTheta
		self.posX = posX
		self.posY = posY
    
		self.targetTheta = 0
    
		self.logicalTheta = 0
		self.logicalX = 0
		self.logicalY = 0
    
		self.Rw = 9
		self.Tr = 240
		self.D  = 9
		
		self.dT1 = 0
		self.dT2 = 0
		
		self.T1 = 0
		self.T2 = 0
		
		self.counter = 0
		
		self.leftEncoder = Encoder()
		self.rightEncoder = Encoder()
		
		self.leftRangeSensor = RangeSensor(self, self.h / 2 + 1, 0, -1)
		self.frontRangeSensor = RangeSensor(self, self.w / 2 + 1, 1, 0)
		self.rightRangeSensor = RangeSensor(self, self.h / 2 + 1, 0, 1)
	
		self.statsWidget = RobotStatsWidget(self)
	
	
	def setOrientation(self, posTheta):
		
		self.posTheta = posTheta
		self.statsWidget.setOrientation(self.posTheta)
	
	def setTargetDirection(self, targetTheta):
		
		self.targetTheta = targetTheta
		
		if self.targetTheta > 2 * math.pi:
			self.targetTheta = self.targetTheta % (2 * math.pi)
		elif self.targetTheta < 0:
			self.targetTheta = self.targetTheta % (2 * math.pi)

		#if self.targetTheta > 2 * math.pi:
		#	self.target = 2 * math.pi
		#	print('targetTheta: %f' % self.targetTheta)
		
		#elif self.targetTheta < 0:
		#	self.targetTheta = 0
		#	print('targetTheta: %f' % self.targetTheta)
		
		self.statsWidget.setTargetDirection(self.targetTheta)
	
	def move(self):
		
		self.leftEncoder.addTicks(self.dT1)
		self.rightEncoder.addTicks(self.dT2)
		
		#print('dT1: ' + str(self.dT1) + ', dT2: ' + str(self.dT2))
		
		b = 9
		
		if (self.dT2 - self.dT1) < 0.00001:
			
			dHeading = 0
			dRealX = self.dT1 * math.cos(self.heading)
			dRealY = self.dT1 * math.sin(self.heading)
			
		elif (self.dT2 - self.dT1) > 24:
			
			dHeading = (self.dT2 - self.dT1) / b
			dRealX = 0
			dRealY = 0
			
		else:
			
			R = b / 2 * (self.dT2 + self.dT1) / (self.dT2 - self.dT1)
		
			dHeading = (self.dT2 - self.dT1) / b
		
			dRealX = R * (math.sin(dHeading + self.heading) - math.sin(self.heading))
			dRealY = R * (math.cos(self.heading) - math.cos(dHeading + self.heading))
		
		#TODO: This is wrong
		dHeading2 = 2 * math.pi * (self.Rw / self.D) * \
				(self.dT1 - self.dT2) / self.Tr

		
		#TODO: This is wrong
		dRealX2 = self.Rw * math.cos(self.heading) * \
				(self.dT1 + self.dT2) * math.pi / self.Tr
		dRealY2 = self.Rw * math.sin(self.heading) * \
				(self.dT1 + self.dT2) * math.pi / self.Tr
	
		#print('dHeading: ' + str(dHeading2) + ', dRealX2: ' + str(dRealX2) + ', dRealY2: ' + str(dRealY2))
		#print('dRealTheta: ' + str(dHeading) + ', dRealX: ' + str(dRealX) + ', dRealY: ' + str(dRealY))
		
		if self.checkCollision(self.posX + dRealX2, self.posY + dRealY2) == False:
			self.heading = self.heading + dHeading2
			self.posX = self.posX + dRealX2
			self.posY = self.posY + dRealY2
		
			self.setOrientation(self.posTheta)
			self.setTargetDirection(self.targetTheta - dHeading2)
		
		#print('heading: ' + str(self.heading) + ', posX: ' + str(self.posX) + ', posY: ' + str(self.posY))
		
		self.counter += 1
		if self.counter == 5:
			self.nextStep()
			self.counter = 0
	
	
	def draw(self, painter):
		
		painter.setPen(QtGui.QColor(0xffffff))
		painter.setBrush(QtGui.QColor(0x00CC66))

		#rect = QtCore.QRect(0, 0, self.w, self.h)
		#original = QtGui.QPolygon(rect, True)
		#painter.drawPolygon(original)		
		
		coords = []
		coords.append(QtCore.QPoint(0, 0))
		coords.append(QtCore.QPoint(self.w, 0))
		coords.append(QtCore.QPoint(self.w, self.h))
		coords.append(QtCore.QPoint(0, self.h))
		
		original = QtGui.QPolygon(coords)
		original.translate(-self.w/2, -self.h/2)
		transform = QtGui.QTransform().rotateRadians(self.posTheta)
		#painter.fillRect(self.posX, self.posX, self.w, self.h, QtGui.QColor(0x0ff000))
		rotated = transform.map(original)
		
		#original.translate(self.posX, self.posY)
		rotated.translate(self.posX, self.posY)
		
		#QtCore.qDebug(str(rotated.point(0)))
		#QtCore.qDebug(str(rotated.point(1)))
		#QtCore.qDebug(str(rotated.point(2)))
		#QtCore.qDebug(str(rotated.point(3)))

		#painter.drawPolygon(original)		
		painter.setBrush(QtGui.QColor(0x0066CC))
		painter.drawPolygon(rotated)

		self.leftRangeSensor.draw(painter, QtGui.QColor(0x00FF00))
		self.frontRangeSensor.draw(painter, QtGui.QColor(0xFF0000))
		self.rightRangeSensor.draw(painter, QtGui.QColor(0x0000FF))
	
	
	def nextStep(self):
		
		leftDist = self.leftRangeSensor.getDistance()
		frontDist = self.frontRangeSensor.getDistance()
		rightDist = self.rightRangeSensor.getDistance()

		self.statsWidget.setLeftRangeSensorDistance(leftDist)
		self.statsWidget.setFrontRangeSensorDistance(frontDist)
		self.statsWidget.setRightRangeSensorDistance(rightDist)
		
		
		dHeading2 = 2 * math.pi * (self.Rw / self.D) * \
				(self.dT1 - self.dT2) / self.Tr
		# dT = dT1 - dT2
		dT = self.targetTheta * self.D * self.Tr / (2 * math.pi * self.Rw)
		
		if dT < 5:
			self.setLeftMotorSpeed(24)
			self.setRightMotorSpeed(24)
		else:
			self.setLeftMotorSpeed(dT)
			self.setRightMotorSpeed(-dT)
		
		self.T1 = 0
		self.T2 = 0
	
	
	def increaseLeftMotorSpeed(self, percent):
		speed = self.dT1 + (percent / 100.0) * Robot.MaxSpeed
		#print('speed: %f, dT1: %f, percent: %f, MaxSpeed: %f' % (speed, self.dT1, percent, Robot.MaxSpeed))
		
		self.setLeftMotorSpeed(speed)
	
	
	def increaseRightMotorSpeed(self, percent):
		speed = self.dT2 + (percent / 100.0) * Robot.MaxSpeed
		#print('speed: %f, dT2: %f, percent: %f, MaxSpeed: %f' % (speed, self.dT2, percent, Robot.MaxSpeed))
		
		self.setRightMotorSpeed(speed)
	
	
	def setLeftMotorSpeed(self, speed):
		self.dT1 = speed
		
		if speed < - Robot.MaxSpeed:
			speed = - Robot.MaxSpeed
		elif speed > Robot.MaxSpeed:
			speed = Robot.MaxSpeed
		
		self.statsWidget.setLeftMotorSpeed(speed)
		#print('dT1: %f dT2: %f' % (self.dT1, self.dT2))
	
	
	def setRightMotorSpeed(self, speed):
		self.dT2 = speed
		
		if speed < - Robot.MaxSpeed:
			speed = - Robot.MaxSpeed
		elif speed > Robot.MaxSpeed:
			speed = Robot.MaxSpeed
		
		self.statsWidget.setRightMotorSpeed(speed)
		#print('dT1: %f dT2: %f' % (self.dT1, self.dT2)) 
	
	
	'''
		O verificare de coliziune rudimentara
	'''
	def checkCollision(self, newPosX, newPosY):
		r = max(self.w, self.h) / 2
		
		x1 = newPosX - r - 1
		y1 = newPosY - r - 1
		x2 = newPosX + r + 1
		y2 = newPosX + r + 1
		
		if x1 < 0 or y1 < 0 or\
			x2 >= ImageMap.image.width() or\
			y2 >= ImageMap.image.height():
			return True
	
		if ImageMap.image.pixel(x1, y1) != 0xFFFFFFFF:
			return True
		if ImageMap.image.pixel(x1, y2) != 0xFFFFFFFF:
			return True
		if ImageMap.image.pixel(x2, y1) != 0xFFFFFFFF:
			return True
		if ImageMap.image.pixel(x2, y2) != 0xFFFFFFFF:
			return True
	
		return False
	
	
	def getStatsWidget(self):
		return self.statsWidget
class CommentGenerationModel(nn.Module):
    """Simple Seq2Seq w/ attention + copy model for pure comment generation (i.e. generating a comment given a method)."""
    def __init__(self, model_path):
        super(CommentGenerationModel, self).__init__()
        self.model_path = model_path
        self.torch_device_name = 'cpu'

    def initialize(self, train_data, embedding_store=None):
        """Initializes model parameters from pre-defined hyperparameters and other hyperparameters
           that are computed based on statistics over the training data."""
        nl_lengths = []
        code_lengths = []
        nl_token_counter = Counter()
        code_token_counter = Counter()

        for ex in train_data:
            trg_sequence = [START] + ex.new_comment_tokens + [END]
            nl_token_counter.update(trg_sequence)
            nl_lengths.append(len(trg_sequence))

            code_sequence = ex.new_code_tokens
            code_token_counter.update(code_sequence)
            code_lengths.append(len(code_sequence))

        self.max_nl_length = int(
            np.percentile(np.asarray(sorted(nl_lengths)), LENGTH_CUTOFF_PCT))
        self.max_code_length = int(
            np.percentile(np.asarray(sorted(code_lengths)), LENGTH_CUTOFF_PCT))

        nl_counts = np.asarray(sorted(nl_token_counter.values()))
        nl_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1
        code_counts = np.asarray(sorted(code_token_counter.values()))
        code_threshold = int(np.percentile(nl_counts, VOCAB_CUTOFF_PCT)) + 1

        if embedding_store is None:
            self.embedding_store = EmbeddingStore(
                nl_threshold, NL_EMBEDDING_SIZE, nl_token_counter,
                code_threshold, CODE_EMBEDDING_SIZE, code_token_counter,
                DROPOUT_RATE, 0, 0, 0, False)
        else:
            self.embedding_store = embedding_store

        self.code_encoder = Encoder(CODE_EMBEDDING_SIZE, HIDDEN_SIZE,
                                    NUM_LAYERS, DROPOUT_RATE)
        self.decoder = GenerationDecoder(NL_EMBEDDING_SIZE,
                                         DECODER_HIDDEN_SIZE, 2 * HIDDEN_SIZE,
                                         self.embedding_store,
                                         NL_EMBEDDING_SIZE, DROPOUT_RATE)

        self.optimizer = torch.optim.Adam(self.parameters(), lr=LR)

    def get_batches(self, dataset, shuffle=False):
        """Divides the dataset into batches based on pre-defined BATCH_SIZE hyperparameter.
           Each batch is tensorized so that it can be directly passed into the network."""
        batches = []
        if shuffle:
            random.shuffle(dataset)

        curr_idx = 0
        while curr_idx < len(dataset):
            start_idx = curr_idx
            end_idx = min(start_idx + BATCH_SIZE, len(dataset))

            code_token_ids = []
            code_lengths = []
            trg_token_ids = []
            trg_extended_token_ids = []
            trg_lengths = []
            invalid_copy_positions = []
            inp_str_reps = []
            inp_ids = []

            for i in range(start_idx, end_idx):
                code_sequence = dataset[i].new_code_tokens
                code_sequence_ids = self.embedding_store.get_padded_code_ids(
                    code_sequence, self.max_code_length)
                code_length = min(len(code_sequence), self.max_code_length)
                code_token_ids.append(code_sequence_ids)
                code_lengths.append(code_length)

                ex_inp_str_reps = []
                ex_inp_ids = []

                extra_counter = len(self.embedding_store.nl_vocabulary)
                max_limit = len(
                    self.embedding_store.nl_vocabulary) + self.max_code_length
                out_ids = set()

                for c in code_sequence[:code_length]:
                    nl_id = self.embedding_store.get_nl_id(c)
                    if self.embedding_store.is_nl_unk(
                            nl_id) and extra_counter < max_limit:
                        if c in ex_inp_str_reps:
                            nl_id = ex_inp_ids[ex_inp_str_reps.index(c)]
                        else:
                            nl_id = extra_counter
                            extra_counter += 1

                    out_ids.add(nl_id)
                    ex_inp_str_reps.append(c)
                    ex_inp_ids.append(nl_id)

                trg_sequence = [START] + dataset[i].new_comment_tokens + [END]
                trg_sequence_ids = self.embedding_store.get_padded_nl_ids(
                    trg_sequence, self.max_nl_length)
                trg_extended_sequence_ids = self.embedding_store.get_extended_padded_nl_ids(
                    trg_sequence, self.max_nl_length, ex_inp_ids,
                    ex_inp_str_reps)

                trg_token_ids.append(trg_sequence_ids)
                trg_extended_token_ids.append(trg_extended_sequence_ids)
                trg_lengths.append(min(len(trg_sequence), self.max_nl_length))
                inp_str_reps.append(ex_inp_str_reps)
                inp_ids.append(ex_inp_ids)

                invalid_copy_positions.append(
                    get_invalid_copy_locations(ex_inp_str_reps,
                                               self.max_code_length,
                                               trg_sequence,
                                               self.max_nl_length))

            batches.append(
                GenerationBatchData(
                    torch.tensor(code_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(code_lengths,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_extended_token_ids,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(trg_lengths,
                                 dtype=torch.int64,
                                 device=self.get_device()),
                    torch.tensor(invalid_copy_positions,
                                 dtype=torch.uint8,
                                 device=self.get_device()), inp_str_reps,
                    inp_ids))
            curr_idx = end_idx
        return batches

    def get_encoder_output(self, batch_data):
        """Gets hidden states, final state, and a length-mask from the encoder."""
        code_embedded_tokens = self.embedding_store.get_code_embeddings(
            batch_data.code_ids)
        code_hidden_states, code_final_state = self.code_encoder.forward(
            code_embedded_tokens, batch_data.code_lengths, self.get_device())
        mask = (torch.arange(code_hidden_states.shape[1],
                             device=self.get_device()).view(1, -1) >=
                batch_data.code_lengths.view(-1, 1)).unsqueeze(1)
        return code_hidden_states, code_final_state, mask

    def forward(self, batch_data):
        """Computes the loss against the gold sequences corresponding to the examples in the batch. NOTE: teacher-forcing."""
        encoder_hidden_states, initial_state, inp_length_mask = self.get_encoder_output(
            batch_data)
        decoder_input_embeddings = self.embedding_store.get_nl_embeddings(
            batch_data.trg_nl_ids)[:, :-1]
        decoder_states, decoder_final_state, generation_logprobs, copy_logprobs = self.decoder.forward(
            initial_state, decoder_input_embeddings, encoder_hidden_states,
            inp_length_mask)

        gold_generation_ids = batch_data.trg_nl_ids[:, 1:].unsqueeze(-1)
        gold_generation_logprobs = torch.gather(
            input=generation_logprobs, dim=-1,
            index=gold_generation_ids).squeeze(-1)
        copy_logprobs = copy_logprobs.masked_fill(
            batch_data.invalid_copy_positions[:, 1:, :encoder_hidden_states.
                                              shape[1]], float('-inf'))
        gold_copy_logprobs = copy_logprobs.logsumexp(dim=-1)

        gold_logprobs = torch.logsumexp(torch.cat([
            gold_generation_logprobs.unsqueeze(-1),
            gold_copy_logprobs.unsqueeze(-1)
        ],
                                                  dim=-1),
                                        dim=-1)
        gold_logprobs = gold_logprobs.masked_fill(
            torch.arange(batch_data.trg_nl_ids[:, 1:].shape[-1],
                         device=self.get_device()).unsqueeze(0) >=
            batch_data.trg_nl_lengths.unsqueeze(-1) - 1, 0)

        likelihood_by_example = gold_logprobs.sum(dim=-1)

        # Normalizing by length. Seems to help
        likelihood_by_example = likelihood_by_example / (
            batch_data.trg_nl_lengths - 1).float()

        return -(likelihood_by_example).mean()

    def compute_generation_likelihood(self, batch_data):
        """This is not used by the generation model but rather the comment update model for re-ranking. It computes P(Comment|Method)."""
        with torch.no_grad():
            encoder_hidden_states, initial_state, inp_length_mask = self.get_encoder_output(
                batch_data)
            decoder_input_embeddings = self.embedding_store.get_nl_embeddings(
                batch_data.trg_nl_ids)[:, :-1]
            decoder_states, decoder_final_state, generation_logprobs, copy_logprobs = self.decoder.forward(
                initial_state, decoder_input_embeddings, encoder_hidden_states,
                inp_length_mask)

            gold_generation_ids = batch_data.trg_nl_ids[:, 1:].unsqueeze(-1)
            gold_generation_logprobs = torch.gather(
                input=generation_logprobs, dim=-1,
                index=gold_generation_ids).squeeze(-1)
            copy_logprobs = copy_logprobs.masked_fill(
                batch_data.
                invalid_copy_positions[:, 1:, :encoder_hidden_states.shape[1]],
                float('-inf'))
            gold_copy_logprobs = copy_logprobs.logsumexp(dim=-1)

            gold_logprobs = torch.logsumexp(torch.cat([
                gold_generation_logprobs.unsqueeze(-1),
                gold_copy_logprobs.unsqueeze(-1)
            ],
                                                      dim=-1),
                                            dim=-1)
            gold_logprobs = gold_logprobs.masked_fill(
                torch.arange(batch_data.trg_nl_ids[:, 1:].shape[-1],
                             device=self.get_device()).unsqueeze(0) >=
                batch_data.trg_nl_lengths.unsqueeze(-1) - 1, 0)
            return torch.exp(
                gold_logprobs.sum(dim=-1) /
                (batch_data.trg_nl_lengths - 1).float())

    def greedy_decode(self, batch_data):
        """Predicts a comment for every method in the batch in a greedy manner."""
        encoder_hidden_states, initial_state, inp_length_mask = self.get_encoder_output(
            batch_data)
        predictions, scores = self.decoder.greedy_decode(
            initial_state, encoder_hidden_states, inp_length_mask,
            self.max_nl_length, batch_data, self.get_device())

        batch_size = initial_state.shape[0]

        decoded_tokens = []
        for i in range(batch_size):
            token_ids = predictions[i]
            tokens = self.embedding_store.get_nl_tokens(
                token_ids, batch_data.input_ids[i],
                batch_data.input_str_reps[i])
            decoded_tokens.append(tokens)
        return decoded_tokens

    def get_device(self):
        """Returns the proper device."""
        if self.torch_device_name == 'gpu':
            return torch.device('cuda')
        else:
            return torch.device('cpu')

    def run_gradient_step(self, batch_data):
        """Performs gradient step."""
        self.optimizer.zero_grad()
        loss = self.forward(batch_data)
        loss.backward()
        self.optimizer.step()
        return float(loss.cpu())

    def run_train(self, train_data, valid_data):
        """Runs training over the entire training set across several epochs. Following each epoch,
           loss on the validation data is computed. If the validation loss has improved, save the model.
           Early-stopping is employed to stop training if validation hasn't improved for a certain number
           of epochs."""
        valid_batches = self.get_batches(valid_data)
        train_batches = self.get_batches(train_data, shuffle=True)

        best_loss = float('inf')
        patience_tally = 0

        for epoch in range(MAX_EPOCHS):
            if patience_tally > PATIENCE:
                print('Terminating')
                break

            self.train()
            random.shuffle(train_batches)

            train_loss = 0
            for batch_data in train_batches:
                train_loss += self.run_gradient_step(batch_data)

            self.eval()
            validation_loss = 0
            with torch.no_grad():
                for batch_data in valid_batches:
                    validation_loss += float(self.forward(batch_data).cpu())

            validation_loss = validation_loss / len(valid_batches)

            if validation_loss <= best_loss:
                torch.save(self, self.model_path)
                saved = True
                best_loss = validation_loss
                patience_tally = 0
            else:
                saved = False
                patience_tally += 1

            print('Epoch: {}'.format(epoch))
            print('Training loss: {}'.format(train_loss / len(train_batches)))
            print('Validation loss: {}'.format(validation_loss))
            if saved:
                print('Saved')
            print('-----------------------------------')
            sys.stdout.flush()

    def run_evaluation(self, test_data):
        """Generates predicted comments for all comments in the test set and computes evaluation metrics."""
        self.eval()

        test_batches = self.get_batches(test_data)
        test_predictions = []

        # Lists of string predictions
        gold_strs = []
        pred_strs = []

        # Lists of tokenized predictions
        references = []
        pred_instances = []

        with torch.no_grad():
            for b_idx, batch_data in enumerate(test_batches):
                print('Evaluating {}'.format(b_idx))
                sys.stdout.flush()
                test_predictions.extend(self.greedy_decode(batch_data))

        for i in range(len(test_predictions)):
            prediction = test_predictions[i]
            gold_str = test_data[i].new_comment
            pred_str = ' '.join(prediction)

            gold_strs.append(gold_str)
            pred_strs.append(pred_str)

            references.append([test_data[i].new_comment_tokens])
            pred_instances.append(prediction)

            print('Gold comment: {}'.format(gold_str))
            print('Predicted comment: {}'.format(pred_str))
            print('----------------------------')

        predicted_accuracy = compute_accuracy(gold_strs, pred_strs)
        predicted_bleu = compute_bleu(references, pred_instances)
        predicted_meteor = compute_meteor(references, pred_instances)

        print('Predicted Accuracy: {}'.format(predicted_accuracy))
        print('Predicted BLEU: {}'.format(predicted_bleu))
        print('Predicted Meteor: {}\n'.format(predicted_meteor))
Ejemplo n.º 33
0
from decoder import Decoder
from base import BaseModel

from utils.general import Config



@click.option('--data', default="configs/data_small.json",
        help='Path to data json config')
@click.option('--vocab', default="configs/vocab_small.json",
        help='Path to vocab json config')
@click.option('--training', default="configs/training_small.json",
        help='Path to training json config')
@click.option('--model', default="configs/model.json",
        help='Path to model json config')
# @click.option('--output', default="results/small/",
#         help='Dir for results and model weights')


_config_args = [
    'configs/data_small.json',
    'configs/vocab_small.json',
    'configs/training_small.json',
    'configs/model.json',
    # 'results/small/',
]

_config = Config(_config_args)
encoder = Encoder(_config)
decoder = Decoder(_config, _vocab.n_tok, _vocab.id_end)
Ejemplo n.º 34
0
dataloader = read_dataset('../pic', img_size, batch_size)
version = input('result version:')

c_loss_weight = 0.3
RF_loss_weight = 0.7
generator_loss_weight = 0.7

path = './result_' + version + '/arg_' + version + '.txt'
f = open(path, 'a+')
arg='epoch='+str(epoch)+'\n'+'batch_size='+str(batch_size)+'\n'+'img_size='+str(img_size)+'\n'+\
  'c_size='+str(c_size)+'\n'+'z_size='+str(z_size)+'\n'+'RF_loss_weight=generator_loss_weight='+str(RF_loss_weight)+'\n'+'c_loss_weight='+str(c_loss_weight)+'\n'

f.write(arg + '\n')
f.close()
unloader = transforms.ToPILImage()
encoder = Encoder(c_size, z_size)


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


g = Generator(c_size + z_size)
g.apply(weights_init)

d = Discriminator()
Ejemplo n.º 35
0
Archivo: encode.py Proyecto: blanu/Dust
import sys

from encoder import Encoder

f=open('stats.out')
lines=f.readlines()
f.close()

dist=[]
for x in range(len(lines)):
  p=float(lines[x].strip())
  dist.append((p, x))

enc=Encoder(dist)
enc.encode(sys.argv[1], sys.argv[2])
enc.decode(sys.argv[2], sys.argv[3])


Ejemplo n.º 36
0
    def __init__(self, vocab_size):
        super(EncoderDecoder, self).__init__()

        self.embedding_layer = nn.Embedding(vocab_size, EMBEDDING_DIM)
        self.encoder = Encoder(vocab_size, self.embedding_layer)
        self.decoder = AttentionDecoder(vocab_size, self.embedding_layer)