Beispiel #1
0
def train(args):
    if args.use_morph:
        train_src_data_path = '%s/train.seg.%s' % (args.data_dir, args.src)
    else:
        train_src_data_path = '%s/train.%s' % (args.data_dir, args.src)
    train_tgt_data_path = '%s/train.%s' % (args.data_dir, args.tgt)
    names = ['src', 'tgt']
    paths = [train_src_data_path, train_tgt_data_path]
    if args.replace_unk:
        names.append('almt')
        paths.append('%s/train.align' % (args.data_dir))
    train_corpus = prepare_corpus(names,
                                  paths,
                                  buckets,
                                  max_size=args.max_size,
                                  vocabs=vocabs,
                                  replace_unk=args.replace_unk,
                                  use_morph=args.use_morph)

    dictionary = train_corpus.dictionary
    if args.use_morph:
        dev_src_data_path = '%s/dev.seg.%s' % (args.data_dir, args.src)
    else:
        dev_src_data_path = '%s/dev.%s' % (args.data_dir, args.src)
    dev_tgt_data_path = '%s/dev.%s' % (args.data_dir, args.tgt)
    if args.DEBUG:  # use smaller dev set for debugging
        if not os.path.isfile(dev_src_data_path + '.debug'):
            subprocess.call('head -n 100 %s > %s' %
                            (dev_src_data_path, dev_src_data_path + '.debug'),
                            shell=True)
            subprocess.call('head -n 100 %s > %s' %
                            (dev_tgt_data_path, dev_tgt_data_path + '.debug'),
                            shell=True)
        dev_src_data_path += '.debug'
        dev_tgt_data_path += '.debug'

    dev_corpus = prepare_corpus(
        ['src', 'tgt'],
        [dev_src_data_path, dev_tgt_data_path],
        buckets,
        max_size=args.max_size,
        use_all=
        True,  # use all data for dev, which means do NOT discard very long sentences
        vocabs=vocabs,
        method='src',
        use_morph=args.use_morph)

    train_set = DataStreamRandom(train_corpus, vocabs=vocabs,
                                 **vars(args))  # random batching
    dev_set = DataStream(
        dev_corpus, vocabs=vocabs,
        **vars(args))  # no random batching, iterated by length
    trainer = Trainer(model, **vars(args))

    datastate_path = '%s/datastate.pkl' % args.train_dir
    if os.path.isfile(datastate_path):
        trainer.tracker.load(datastate_path)

    trainer.train(train_set, dev_set, tgt_vocab, dictionary=dictionary)
Beispiel #2
0
def parse_ds(qaid, meas, conn):

    #Build DataStream Object
    ds = DataStream(qaid, meas, conn)

    #Build and return Query Object
    #Total Returns Frequency = 'D' (Trading Daily)
    return QueryObject(qaid, meas, 'D', ds.data, 'DataStream')
Beispiel #3
0
def test(args):
    train_src_data_path = '%s/train.%s' % (args.data_dir, args.src)
    train_tgt_data_path = '%s/train.%s' % (args.data_dir, args.tgt)
    names = ['src', 'tgt']
    paths = [train_src_data_path, train_tgt_data_path]
    if args.replace_unk:
        names.append('almt')
        paths.append('%s/train.align' % (args.data_dir))
    if args.use_morph:
        names.append('morph')
        paths.append('%s/seg.%s' % (args.data_dir, args.src))
    train_corpus = prepare_corpus(names,
                                  paths,
                                  buckets,
                                  max_size=args.max_size,
                                  vocabs=vocabs,
                                  replace_unk=args.replace_unk)

    dictionary = train_corpus.dictionary

    test_src_data_path = '%s/test.%s' % (args.data_dir, args.src)
    #test_tgt_data_path = '%s/test.%s' %(args.data_dir, args.tgt)
    if args.test_path is not None:
        test_src_data_path = '%s.%s' % (args.test_path, args.src)
        #test_tgt_data_path = '%s.%s' %(args.test_path, args.tgt)
    else:
        test_src_data_path = '%s/test.%s' % (args.data_dir, args.src)
        #test_tgt_data_path = '%s/test.%s' %(args.data_dir, args.tgt)

    test_corpus = prepare_corpus(['src'], [test_src_data_path],
                                 buckets,
                                 vocabs={'src': vocabs['src']},
                                 use_all=True,
                                 method='src')
    test_set = DataStream(test_corpus, vocabs=vocabs, **vars(args))

    model.eval()
    translations = dict()
    alignments = dict()
    start = time.time()
    for b, batch in enumerate(test_set, 1):
        print('\rbatch %d: %.4fs' % (b, time.time() - start), end='')
        sys.stdout.flush()

        preds, alignments, _ = model.search(batch)
        translate_(translations,
                   preds,
                   batch,
                   vocabs['tgt'],
                   src_tokens=batch.src_tokens,
                   dictionary=dictionary,
                   alignments=alignments)
    print()
    bleu_test(translations, test_set, args.bleu_path,
              gold_path=None)  # NOTE no bleu evaluation for test
Beispiel #4
0
    def __init__(self, host, port, config_path, image_x=400, image_y=300):
        self._host = host
        self._port = port
        logging.debug('selected host %s' % host)
        logging.debug('selected port %s' % port)
        self._config_path = config_path
        logging.debug('set config path as port %s' % config_path)

        logging.debug('selected port %s' % port)
        self._port_control = self._port + 2
        self._port_stream = self._port + 1
        #self._fps =config.fps

        self._socket_world = socket_util.pers_connect(self._host, self._port)
        logging.debug("Connected to Unreal Server World Socket")
        self._socket_stream = 0
        self._socket_control = 0
        self._image_x = image_x
        self._image_y = image_y

        self._data_stream = DataStream(image_x, image_y)
        logging.debug("Started Unreal Client")
Beispiel #5
0
    def restart(self):
        logging.debug("Trying to close clients")
        self.close_conections()
        connected = False
        self._data_stream._running = False
        while not connected:
            try:
                logging.debug("Trying to connect to the world thread")
                self._socket_world = socket_util.connect(
                    self._host, self._port)
                connected = True
            except:
                logging.debug("Couldn't connected ... retry in 10 seconds...")
                time.sleep(10)

        self._data_stream = DataStream(self._image_x, self._image_y)
        self.startAgent()
        self.requestNewEpisode()
        scene, positions = self.receiveSceneConfiguration()
        self.newEpisode(self._latest_start, self._latest_end)
        logging.debug("restarted the world connection")
        return scene, positions
Beispiel #6
0
    def __init__(self, reader, head, bits):
        self.streams = []
        pages = reader.pages_from_size(head.directory_size)

        # 0..n in page of directory pages.
        bits.min_capacity(head.directory_size)
        directory_root_pages = len(head.directory_root)
        pages_per_page = head.page_size / 4
        pages_to_go = pages
        for i in range(0, directory_root_pages):
            pages_in_this_page = pages_per_page
            if pages_to_go <= pages_per_page:
                pages_in_this_page = pages_to_go
            reader.seek(head.directory_root[i], 0);
            bits.append(reader.reader, pages_in_this_page * 4)
            pages_to_go -= pages_in_this_page
        bits.position = 0

        stream = DataStream(head.directory_size, bits, pages)
        bits.min_capacity(head.directory_size)
        stream.read(reader, bits)

        # 0..3 in directory pages
        count = bits.read_int32()

        # 4..n
        sizes = bits.read_int32(count)

        # n..m
        self.streams = []
        for i in range(0, count):
            if sizes[i] <= 0:
                self.streams.append(DataStream())
            else:
                self.streams.append(DataStream(
                    sizes[i], bits, reader.pages_from_size(sizes[i])
                ))
Beispiel #7
0
def test_load_file():
    logging.debug('---------------Begin test_load_file()')

    # data_set_dir = 'data_sets'
    # filenames = [os.path.join(data_set_dir, f) for f in os.listdir(data_set_dir) if not f.startswith('.')]

    for file in filenames:
        logging.debug(f'Loading {file}')
        ds = DataStream(file)
        logging.debug(f'Data is {ds}')

        if not file.endswith('empty'):
            assert np.array_equal(
                ds, [[4, 8, 0], [5, 9, 1], [6, 10, 2], [7, 11, 3]])

        if file.find('header') != -1 or file.find(
                'structured') != -1 or file.find('dict') != -1:
            assert ds._keys == ('x', 'y', 'time')
        elif file.endswith('empty'):
            assert ds.array.size == 0
            assert ds._keys == ()
        else:
            assert ds._keys == ('x', 'y', 'z')
Beispiel #8
0
    def __init__(self, reader, head, bits):
        self.streams = []
        pages = reader.pages_from_size(head.directory_size)

        # 0..n in page of directory pages.
        bits.min_capacity(head.directory_size)
        directory_root_pages = len(head.directory_root)
        pages_per_page = head.page_size / 4
        pages_to_go = pages
        for i in range(0, directory_root_pages):
            pages_in_this_page = pages_per_page
            if pages_to_go <= pages_per_page:
                pages_in_this_page = pages_to_go
            reader.seek(head.directory_root[i], 0)
            bits.append(reader.reader, pages_in_this_page * 4)
            pages_to_go -= pages_in_this_page
        bits.position = 0

        stream = DataStream(head.directory_size, bits, pages)
        bits.min_capacity(head.directory_size)
        stream.read(reader, bits)

        # 0..3 in directory pages
        count = bits.read_int32()

        # 4..n
        sizes = bits.read_int32(count)

        # n..m
        self.streams = []
        for i in range(0, count):
            if sizes[i] <= 0:
                self.streams.append(DataStream())
            else:
                self.streams.append(
                    DataStream(sizes[i], bits,
                               reader.pages_from_size(sizes[i])))
Beispiel #9
0
    def stop(self):
        self.close_conections()
        connected = False

        self._data_stream._running = False
        self._data_stream = DataStream(self._image_x, self._image_y)
Beispiel #10
0
class CarlaUnreal(object):
    """
	Normal instanciation of the class, creating also the thread class responsible for receiving data
	"""
    def __init__(self, host, port, config_path, image_x=400, image_y=300):
        self._host = host
        self._port = port
        logging.debug('selected host %s' % host)
        logging.debug('selected port %s' % port)
        self._config_path = config_path
        logging.debug('set config path as port %s' % config_path)

        logging.debug('selected port %s' % port)
        self._port_control = self._port + 2
        self._port_stream = self._port + 1
        #self._fps =config.fps

        self._socket_world = socket_util.pers_connect(self._host, self._port)
        logging.debug("Connected to Unreal Server World Socket")
        self._socket_stream = 0
        self._socket_control = 0
        self._image_x = image_x
        self._image_y = image_y

        self._data_stream = DataStream(image_x, image_y)
        logging.debug("Started Unreal Client")

    def setIniFile(self, config_path):

        self._config_path = config_path
        logging.debug('set config path as port %s' % config_path)

    """ Starting the Agent. The image stream port
	and the control port """

    def startAgent(self):

        logging.debug("Going to Connect Stream and start thread")
        self._socket_stream = socket_util.pers_connect(self._host,
                                                       self._port_stream)

        self._socket_control = socket_util.pers_connect(
            self._host, self._port_control)
        logging.debug("Control Socket Connected")

        self._data_stream.start(self._socket_stream)
        logging.debug("Streaming Thread  Started")

    def receiveSceneConfiguration(self):

        try:
            data = socket_util.get_message(self._socket_world)
            scene = Scene()
            scene.ParseFromString(data)
            logging.debug("Received Scene Configuration")

            # parsing positions
            positions = []
            number_of_positions = len(
                scene.positions) / 8  # Every 8 bytes you have a position
            for i in range(0, number_of_positions * 2, 2):
                x = struct.unpack('f', scene.positions[i * 4:(i + 1) * 4])[0]
                y = struct.unpack('f',
                                  scene.positions[(i + 1) * 4:(i + 2) * 4])[0]
                positions.append((x, y))

            return scene, positions

        except:
            logging.debug("Died When receiving configuration")
            return self.restart()

    def requestNewEpisode(self, ini_file=None):

        requestEpisode = RequestNewEpisode()
        if ini_file == None:  # You can send a new ini file to be open, if not open the one defined on start
            ini_path = self._config_path
        else:
            ini_path = ini_file

        with open(ini_path, "r") as myfile:
            data = myfile.read()
        logging.debug("Set the Init File")

        requestEpisode.init_file = data
        try:
            socket_util.send_message(self._socket_world, requestEpisode)
        except:
            logging.debug("Died When requesting new episode")
            self.restart()

        logging.debug("Send the new episode Request")

    def newEpisode(self, start_index, end_index):

        self._latest_start = start_index
        self._latest_end = end_index
        scene_init = EpisodeStart()
        scene_init.start_index = start_index
        scene_init.end_index = end_index
        try:
            socket_util.send_message(self._socket_world, scene_init)
        except:
            logging.debug("Died When confirming new episode")
            self.restart()

        logging.debug("Send the new episode Message")
        episode_ready = EpisodeReady()
        episode_ready.ready = False
        self._data_stream.clean()
        try:
            while not episode_ready.ready:
                data = socket_util.get_message(self._socket_world)
                episode_ready.ParseFromString(data)
        except:

            logging.debug("Died when trying to receive episode reading")
            self.restart()

        logging.debug("Episode is Ready")

    ### **** PROTOCOL 3 ****

    def getReward(self):
        logging.debug("Got A new Reward")

        while True:
            try:
                reward_data = self._data_stream.get_the_latest_data()
                return reward_data
            except:
                logging.debug("Got an empty reward")
                self.restart()

        return reward_data

    """ Command contains:
		Steering: -1 to 1
		Acc : -1 to 1
	"""

    def sendCommand(self, control):

        logging.debug(
            "Send Control Comand : acc -> %f , steer %f, brake %f, hand_brake %d, gear %d"
            % (control.gas, control.steer, control.brake, control.hand_brake,
               control.reverse))
        try:
            socket_util.send_message(self._socket_control, control)
        except:

            logging.debug("Problems on sending the commands... restarting")
            self.restart(
            )  # the mensage is not resend because it likely lost its relevance.

    def restart(self):
        logging.debug("Trying to close clients")
        self.close_conections()
        connected = False
        self._data_stream._running = False
        while not connected:
            try:
                logging.debug("Trying to connect to the world thread")
                self._socket_world = socket_util.connect(
                    self._host, self._port)
                connected = True
            except:
                logging.debug("Couldn't connected ... retry in 10 seconds...")
                time.sleep(10)

        self._data_stream = DataStream(self._image_x, self._image_y)
        self.startAgent()
        self.requestNewEpisode()
        scene, positions = self.receiveSceneConfiguration()
        self.newEpisode(self._latest_start, self._latest_end)
        logging.debug("restarted the world connection")
        return scene, positions

    def stop(self):
        self.close_conections()
        connected = False

        self._data_stream._running = False
        self._data_stream = DataStream(self._image_x, self._image_y)

    def close_conections(self):

        try:
            self._socket_world.shutdown(socket.SHUT_RDWR)
            self._socket_world.close()

            logging.debug("Close world")
        except Exception as ex:
            print(ex.message)

        try:
            self._socket_stream.shutdown(socket.SHUT_RDWR)
            self._socket_stream.close()
            logging.debug("Close Stream")
        except Exception as ex:
            print(ex.message)

        try:
            self._socket_control.shutdown(socket.SHUT_RDWR)
            self._socket_control.close()
            logging.debug("Close Control")
        except Exception as ex:
            print(ex.message)
Beispiel #11
0
def test_data_type():
    logging.debug('---------------Begin test_dataType()')
    assert data_type(DataStream()) == 'DictArray'
    assert data_type(DataStream([[], [], []])) == 'DictArray'
Beispiel #12
0
def test_construct_equal():
    ds1 = DataStream()
    add_point_tester(ds1)
    ds2 = DataStream(ds1)
    check_datastreams_equal(ds1, ds2)
Beispiel #13
0
def test_base_constructor():
    logging.debug('---------------Begin test_base_constructor()')
    ds = DataStream()
Beispiel #14
0
import numpy as np
import os
from datastream import DataStream

time, x, y = [0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]
data_set_dir = os.path.join('tests', 'data_sets')
filenames = [
    os.path.join(data_set_dir, f) for f in os.listdir(data_set_dir)
    if not f.startswith('.')
]

known = DataStream({'x': x, 'y': y, 'time': time})
for f in filenames:
    print(f)
    r = DataStream(f)
    if not f.endswith('empty'):
        try:
            assert np.array_equal(r, known)
        except AssertionError:
            print('############', f)
            print(known)
            print(r)
        if f.find('dict') != -1 or f.find('header') != -1 or f.find(
                'save_struct') != -1:
            assert set(r.keys()) == set(known.keys())
        else:
            assert set(r.keys()) == set(['x', 'y', 'z'])
    else:
        assert r.size == 0
print('COMPLETED')