예제 #1
0
    def initialize(self):
        """    
        tables.EArray(parentnode, name, atom=None, shape=None, 
        title='', filters=None, expectedrows=None, 
        chunkshape=None, byteorder=None, _log=True)[source]    
        """

        self.m.log(1, '+++Init method of MCWaveform algorithm+++')
        self.NPMTS = int(12)
        self.LEN_PMT = int(599999)
        self.NSIPM = int(1792)
        self.LEN_SIPM = int(600)
        path = "/Users/jjgomezcadenas/Documents/Development/NEXT/data/Waveforms/"
        file = "WF_Tl_0.h5"

        self.NEVENTS = self.logman["CNTJob"].ints["NEVENTS"]
        self.h5f = tables.open_file(path + file,
                                    "w",
                                    filters=tables.Filters(complib="blosc",
                                                           complevel=9))

        self.pmtrd = self.h5f.create_earray(self.h5f.root,
                                            "pmtrd",
                                            atom=tables.IntAtom(),
                                            shape=(0, self.NPMTS,
                                                   self.LEN_PMT),
                                            expectedrows=self.NEVENTS)

        self.sipmrd = self.h5f.create_earray(self.h5f.root,
                                             "sipmrd",
                                             atom=tables.IntAtom(),
                                             shape=(0, self.NSIPM,
                                                    self.LEN_SIPM),
                                             expectedrows=self.NEVENTS)

        group = self.h5f.create_group(self.h5f.root, "Detector")
        self.geom_table = self.h5f.create_table(group, "DetectorGeometry",
                                                DetectorGeometry,
                                                "DetectorGeometry",
                                                tables.Filters(0))

        group = self.h5f.create_group(self.h5f.root, "Sensors")
        self.pmt_table = self.h5f.create_table(group, "DataPMT", DataPMT,
                                               "DataPMT", tables.Filters(0))
        self.sipm_table = self.h5f.create_table(group, "DataSiPM", DataSiPM,
                                                "DataSiPM", tables.Filters(0))

        group = self.h5f.create_group(self.h5f.root, "MC")
        self.MCTrack_table = self.h5f.create_table(group, "MCTracks",
                                                   MCTrack, "MCTracks",
                                                   tables.Filters(0))
        # self.h5f = tables.open_file("pmtrd.h5", "a",
        #                     filters=tables.Filters(complib="blosc", complevel=9))
        #self.pmt = self.h5f.create_table(self.h5f.root, "pmt", PMTRD)

        self.t = time()
        self.n_evt = 0

        return
예제 #2
0
def init_output(outfile):
    """
    Creates a timestamped output directory and writes header to output file.
    """
    with tables.open_file(outfile, 'w') as f:
        ## Create extendable arrays so we can incrementally write output
        f.create_earray(f.root, 'ancs', atom=tables.IntAtom(), shape=(0, ))
        f.create_earray(f.root, 'liks', atom=tables.FloatAtom(), shape=(0, ))

        ## Trees, which are variable-length, must be added individually
        f.create_vlarray(f.root, 'trees', atom=tables.IntAtom())
        f.create_vlarray(f.root, 'genotypes', atom=tables.IntAtom())
예제 #3
0
def populate(f, nlevels):
    g = f.root
    #arr = numpy.zeros((10,), "f4")
    #descr = {'f0': tables.Int32Col(), 'f1': tables.Float32Col()}
    for i in range(nlevels):
        #dset = f.create_array(g, "DS1", arr)
        #dset = f.create_array(g, "DS2", arr)
        f.create_carray(g, "DS1", tb.IntAtom(), (10,))
        f.create_carray(g, "DS2", tb.IntAtom(), (10,))
        #dset = f.create_table(g, "DS1", descr)
        #dset = f.create_table(g, "DS2", descr)
        f.create_group(g, 'group2_')
        g = f.create_group(g, 'group')
예제 #4
0
def open_or_create_dataset_file(filename, filters, groups, add_classes):
	if os.path.exists(filename):
		return tables.open_file(filename, mode='r+')
	file = tables.open_file(filename, mode='w')
	for i in groups:
		file.create_group(file.root, i)
	file.create_carray(file.root, 'count', atom=tables.IntAtom(), shape=(1,), filters=filters)
	if add_classes:
		file.create_earray(file.root, 'classes', atom=tables.StringAtom(25), shape=(0,), filters=filters)
	file.create_earray(file.root, 'train', atom=tables.IntAtom(), shape=(0,), filters=filters)
	file.create_earray(file.root, 'test', atom=tables.IntAtom(), shape=(0,), filters=filters)
	file.root.count[0] = 0
	return file
예제 #5
0
def populate(f, nlevels):
    g = f.root
    arr = numpy.zeros((10, ), "f4")
    recarr = numpy.zeros((10, ), "i4,f4")
    descr = {'f0': tables.Int32Col(), 'f1': tables.Float32Col()}
    for i in range(nlevels):
        #dset = f.createArray(g, "DS1", arr)
        #dset = f.createArray(g, "DS2", arr)
        dset = f.createCArray(g, "DS1", tables.IntAtom(), (10, ))
        dset = f.createCArray(g, "DS2", tables.IntAtom(), (10, ))
        #dset = f.createTable(g, "DS1", descr)
        #dset = f.createTable(g, "DS2", descr)
        group2 = f.createGroup(g, 'group2_')
        g = f.createGroup(g, 'group')
예제 #6
0
    def __init__(self, database: AbstractDB):
        """
        Initialize the atoms for meta-data (types, valid tag, and splits)
        Args:
            database (AbstractDB): Associated Database object
        """
        super().__init__(database)
        self.filename_atom = tables.StringAtom(itemsize=255)
        self.types_atom = tables.StringAtom(itemsize=255)
        # whether the patch is valid.
        self.valid_atom = tables.BoolAtom(shape=(), dflt=False)
        # save the meta info: split
        # noinspection PyArgumentList
        self.file_list_atom = tables.StringAtom(itemsize=get_path_limit())
        # noinspection PyArgumentList
        self.split_atom = tables.IntAtom(shape=(), dflt=False)

        self.hdf5_organizer = H5Organizer(self.database,
                                          self.database.group_level)
        self.data_extractor = DataExtractor(self.database)
        self.weight_writer = WeightCollector(
            self.database,
            self.data_extractor,
            weight_counter=self.database.weight_counter_callable)
        self.data_size = {}
예제 #7
0
 def test99b_nonScalarEnum(self):
     """Describing an enumerated column of non-scalars (not implemented)."""
     colors = {'red': (1, 2, 3)}
     self.assertRaises(NotImplementedError,
                       self._createCol,
                       colors,
                       'red',
                       base=tables.IntAtom(shape=3))
예제 #8
0
def write_carray(file, nchildren, niter):
    for i in range(niter):
        fileh = tables.openFile(file, mode="w")
        for child in range(nchildren):
            fileh.createCArray(fileh.root, 'array' + str(child),
                               tables.IntAtom(), (2, ), "child: %d" % child)
        show_mem("After creating. Iter %s" % i)
        fileh.close()
        show_mem("After close")
예제 #9
0
def write_vlarray(file, nchildren, niter):
    for i in range(niter):
        fileh = tables.openFile(file, mode="w")
        for child in range(nchildren):
            vl = fileh.createVLArray(fileh.root, 'array' + str(child),
                                     tables.IntAtom(), "child: %d" % child)
            vl.append([1, 2, 3])
        show_mem("After creating. Iter %s" % i)
        fileh.close()
        show_mem("After close")
예제 #10
0
def write_earray(file, nchildren, niter):
    for i in range(niter):
        fileh = tables.open_file(file, mode="w")
        for child in range(nchildren):
            ea = fileh.create_earray(fileh.root, 'array' + str(child),
                                     tables.IntAtom(), shape=(0,),
                                     title="child: %d" % child)
            ea.append([1, 2, 3])
        show_mem("After creating. Iter %s" % i)
        fileh.close()
        show_mem("After close")
예제 #11
0
    def create_db(filename, params, total_env_count=None, traj_per_env=None):
        """
        :param filename: file name for database
        :param params: dotdict describing the domain
        :param total_env_count: total number of environments in the dataset (helps to preallocate space)
        :param traj_per_env: number of trajectories per environment
        """
        N = params.grid_n
        M = params.grid_m
        num_state = N * M
        if total_env_count is not None and traj_per_env is not None:
            total_traj_count = total_env_count * traj_per_env
        else:
            total_traj_count = 0

        if os.path.isfile(filename):
            print (filename + " already exitst, opening.")
            return tables.open_file(filename, mode='a')

        db = tables.open_file(filename, mode='w')

        db.create_earray(db.root, 'envs', tables.IntAtom(), shape=(0, N, M), expectedrows=total_env_count)

        db.create_earray(db.root, 'expRs', tables.FloatAtom(), shape=(0, ), expectedrows=total_traj_count)

        db.create_earray(db.root, 'valids', tables.IntAtom(), shape=(0, ), expectedrows=total_traj_count)

        db.create_earray(db.root, 'bs', tables.FloatAtom(), shape=(0, num_state), expectedrows=total_traj_count)

        db.create_earray(db.root, 'steps', tables.IntAtom(),
                         shape=(0, 3),  # state,  action, observation
                         expectedrows=total_traj_count * 10) # rough estimate

        db.create_earray(db.root, 'samples', tables.IntAtom(),
                         shape=(0, 6),  # env_id, goal_state, step_id, traj_length, collisions, failed
                         expectedrows=total_traj_count)
        db.create_earray(db.root, 'qmdpBeliefs', tables.FloatAtom(), shape=(0, num_state,),expectedrows=total_traj_count*10)
        return db
def init_h5_result_file(h5, expectedrows=50000):
    """
    Receives a h5 file that has just been created,
    creates the proper arrays:
     - query
     - target
     - position
     - n_results
    """
    group = h5.createGroup("/",'results','general, sole group')
    h5.createEArray(group,'query',tables.StringAtom(18,shape=()),(0,),
                    'tid of the query',
                    expectedrows=expectedrows)
    h5.createEArray(group,'target',tables.StringAtom(18,shape=()),(0,),
                    'tid of the target',
                    expectedrows=expectedrows)
    h5.createEArray(group,'pos',tables.IntAtom(shape=()),(0,),
                    'position of the target in the result list',
                    expectedrows=expectedrows)
    h5.createEArray(group,'n_results',tables.IntAtom(shape=()),(0,),
                    'lenght of the result list returned by query',
                    expectedrows=expectedrows)
    # done
    return
예제 #13
0
    def test19_getCutNodes(self):
        """Check the getCutNodes method.
        """

        # Add a couple of nodes to the hidden group
        tmp_db = VTAPP.dbManager.getDB(
            VTAPP.dbManager.tmp_filepath).getH5File()
        tmp_db.createGroup('/_p_cutNode', 'Group_A')
        tmp_db.createCArray('/_p_cutNode', 'Hidden_CArray', tables.IntAtom(),
                            (3, 3))
        tmp_db.flush()
        cut_nodes = VTAPP.dbManager.getCutNodes()
        cut_nodes.sort()
        expected = ['Group_A', 'Hidden_CArray']
        self.assertEqual(cut_nodes, expected,
                         'The retrieved list of cut nodes is wrong')
예제 #14
0
def combine_partial_contribs(partial_files, outfile):
    """
    Combines the sparse results of several allele dropping simulations
    into a single sparse array.
    """
    ## Dict of sparse contribs per region
    region_contribs = defaultdict(initialize_sparse_array)

    for pf in partial_files:
        print "Loading contribs from", pf
        with tables.open_file(pf, 'r') as f:
            ## Iterate through each region node in the sparse file
            for partial_node in f.list_nodes(f.root.sparse_hist):
                ## Load sparse data
                region = partial_node.name
                r, c, d = np.transpose(partial_node[:])

                ## Add results to total
                new_tot = append_sparse(region_contribs[region], r, c, d)
                region_contribs[region] = new_tot

    ## Write results to file
    with tables.open_file(outfile, 'w') as f:
        filters = tables.Filters(complevel=5, complib='blosc')
        g = f.create_group(f.root, 'sparse_hist')

        for region, contribs in region_contribs.iteritems():
            print "Writing contribs for region:", region
            ## Transpose data so max row size is not exceeded
            r, c, d = contribs.row, contribs.col, contribs.data
            contribs_array = np.transpose(np.vstack([r, c, d]))

            ## Store in compressed array
            ca = f.create_carray(g,
                                 region.strip(),
                                 tables.IntAtom(),
                                 shape=(contribs_array.shape),
                                 filters=filters)
            ca[:] = contribs_array

            ## Store number of inds in region
            with tables.open_file(partial_files[0], 'r') as pf:
                raw_node = pf.get_node(pf.root.raw, str(region))
                ninds_region = raw_node._v_attrs['ninds']
            f.set_node_attr(ca, 'ninds', ninds_region)
예제 #15
0
def create_hdf_arrays(file_name, ports, dig_in, emg_port, emg_channels):
    hf5 = tables.open_file(file_name, 'r+')
    n_electrodes = len(ports) * 32
    atom = tables.IntAtom()

    # Create arrays for digital inputs
    for i in dig_in:
        dig_inputs = hf5.create_earray('/digital_in', 'dig_in_%i' % i, atom,
                                       (0, ))

    # Create arrays for neural electrodes, and make directories to store stuff coming out from blech_process
    for i in range(n_electrodes - len(emg_channels)):
        el = hf5.create_earray('/raw', 'electrode%i' % i, atom, (0, ))

    # Create arrays for EMG electrodes
    for i in range(len(emg_channels)):
        el = hf5.create_earray('/raw_emg', 'emg%i' % i, atom, (0, ))

    # Close the hdf5 file
    hf5.close()
예제 #16
0
    def execute(self, context):
        scene = context.scene
        trkr = bpy.data.objects['tracker1'] # this script looks for an object named tracker1

        fp = scene.render.filepath # get existing output path

        handler = lose.Loser(os.path.normpath(f'{fp}/ground_truth.h5'))

        handler.new_group(fmode='w', mat44=(4, 4), pos=(3,), rot_q=(4,))
        handler.new_group(atom=t.IntAtom(), frame_id=(1,))

        print ('starting to gather data\n\nframes will be saved to "' + os.path.normpath(f'{fp}/frames/') + '"')

        print ('this handler will be used to save data:')
        print (handler)

        print ('starting render...\n')

        scene.render.filepath = fp
        try:
            with handler:
                # sequence length is pulled from blender animation duration settings
                for i in range(scene.frame_start, scene.frame_end+1, scene.frame_step):
                    scene.frame_set(i)
                    scene.render.filepath = os.path.normpath(f'{fp}/frames/{i}')
                    bpy.ops.wm.redraw_timer(type='DRAW_WIN_SWAP', iterations=1, time_limit=1/1000)

                    mat_temp = trkr.matrix_world
                    # save tracker data
                    handler.save(mat44=[np.array(mat_temp)], pos=[np.array(mat_temp.to_translation())], rot_q=[np.array(mat_temp.to_quaternion())], frame_id=[[i]])
                    print (f'frame {i} data saved')

                    bpy.ops.render.render(write_still=True) # render still

        finally:
            scene.render.filepath = fp

        print ('data grab done')
        return {'FINISHED'}
예제 #17
0
def process_filelist_train(filelist=None,
                           testartists=None,
                           tmpfilename=None,
                           npicks=None,
                           winsize=None,
                           finaldim=None,
                           typecompress='picks'):
    """
    Main function, process all files in the list (as long as their artist
    is not in testartist)
    INPUT
       filelist     - a list of song files
       testartists  - set of artist ID that we should not use
       tmpfilename  - where to save our processed features
       npicks       - number of segments to pick per song
       winsize      - size of each segment we pick
       finaldim     - how many values do we keep
       typecompress - one of 'picks' (win of btchroma), 'corrcoef' (correlation coefficients),
                      'cov' (covariance)
    """
    # sanity check
    for arg in locals().values():
        assert not arg is None, 'process_filelist_train, missing an argument, something still None'
    if os.path.isfile(tmpfilename):
        print 'ERROR: file', tmpfilename, 'already exists.'
        return
    # create outputfile
    output = tables.openFile(tmpfilename, mode='a')
    group = output.createGroup("/", 'data', 'TMP FILE FOR YEAR RECOGNITION')
    output.createEArray(group,
                        'feats',
                        tables.Float64Atom(shape=()), (0, finaldim),
                        '',
                        expectedrows=len(filelist))
    output.createEArray(group,
                        'year',
                        tables.IntAtom(shape=()), (0, ),
                        '',
                        expectedrows=len(filelist))
    output.createEArray(group,
                        'track_id',
                        tables.StringAtom(18, shape=()), (0, ),
                        '',
                        expectedrows=len(filelist))
    # random projection
    ndim = 12  # fixed in this dataset
    if typecompress == 'picks':
        randproj = RANDPROJ.proj_point5(ndim * winsize, finaldim)
    elif typecompress == 'corrcoeff' or typecompress == 'cov':
        randproj = RANDPROJ.proj_point5(ndim * ndim, finaldim)
    elif typecompress == 'avgcov':
        randproj = RANDPROJ.proj_point5(90, finaldim)
    else:
        assert False, 'Unknown type of compression: ' + str(typecompress)
    # iterate over files
    cnt_f = 0
    for f in filelist:
        cnt_f += 1
        # verbose
        if cnt_f % 50000 == 0:
            print 'training... checking file #', cnt_f
        # check file
        h5 = GETTERS.open_h5_file_read(f)
        artist_id = GETTERS.get_artist_id(h5)
        year = GETTERS.get_year(h5)
        track_id = GETTERS.get_track_id(h5)
        h5.close()
        if year <= 0 or artist_id in testartists:
            continue
        # we have a train artist with a song year, we're good
        bttimbre = get_bttimbre(f)
        if typecompress == 'picks':
            if bttimbre is None:
                continue
            # we even have normal features, awesome!
            processed_feats = CBTF.extract_and_compress(bttimbre,
                                                        npicks,
                                                        winsize,
                                                        finaldim,
                                                        randproj=randproj)
        elif typecompress == 'corrcoeff':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.corr_and_compress(timbres,
                                                     finaldim,
                                                     randproj=randproj)
        elif typecompress == 'cov':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.cov_and_compress(timbres,
                                                    finaldim,
                                                    randproj=randproj)
        elif typecompress == 'avgcov':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.avgcov_and_compress(timbres,
                                                       finaldim,
                                                       randproj=randproj)
        else:
            assert False, 'Unknown type of compression: ' + str(typecompress)
        # save them to tmp file
        n_p_feats = processed_feats.shape[0]
        output.root.data.year.append(np.array([year] * n_p_feats))
        output.root.data.track_id.append(np.array([track_id] * n_p_feats))
        output.root.data.feats.append(processed_feats)
    # we're done, close output
    output.close()
    return
예제 #18
0
    sys.exit()

climb_file_paths = sys.argv[1]
climb_merge_file = sys.argv[2]
control_file_paths = sys.argv[3]
control_merge_file = sys.argv[4]

climb_files = sorted([line.strip() for line in open(climb_file_paths)])
control_files = sorted([line.strip() for line in open(control_file_paths)])

assert len(climb_files) == len(control_files)

with tables.open_file(control_merge_file, 'w') as control_merge_file:
    control_liks = tables.EArray(control_merge_file.root,
                                 'control_liks',
                                 tables.IntAtom(),
                                 shape=(0, ))
    tot_liks = tables.EArray(control_merge_file.root,
                             'tot_liks',
                             tables.IntAtom(),
                             shape=(0, ))

    for fname in control_files:
        print("Merging", fname)
        with tables.open_file(fname, 'r') as f:
            control_liks.append(f.root.control_liks[:])
            tot_liks.append(f.root.tot_liks[:])

with tables.open_file(climb_merge_file, 'w') as climb_merge_file:
    ancs = tables.EArray(climb_merge_file.root,
                         'ancs',
예제 #19
0
# Open a file in "w"rite mode
fileh = tables.open_file("MDobjects.h5", mode="w")
# Create the table with compression 'on' in order to reduce size as
# much as possible
table = fileh.create_table(fileh.root,
                           'table',
                           Particle,
                           "A table",
                           filters=tables.Filters(complevel=1))
# Append several rows with default values
for i in range(10):
    table.row.append()
table.flush()

# create new arrays
atom1 = tables.IntAtom()
shape1 = (2, 10, 10, 1)
filters1 = tables.Filters(complevel=1)
#(2, 10, 10, 3)
array1 = fileh.create_carray(fileh.root,
                             'array1',
                             atom1,
                             shape1,
                             filters=filters1)
atom2 = tables.FloatAtom()
shape2 = (2, 10, 10, 3, 1)
filters2 = tables.Filters(complevel=1)
#(2, 10, 10, 3, 200)
array2 = fileh.create_carray(fileh.root,
                             'array2',
                             atom2,
예제 #20
0
def create_all_arrays(h5, expectedrows=1000):
    """
    Utility functions used by both create_song_file and create_aggregate_files,
    creates all the EArrays (empty).
    INPUT
       h5   - hdf5 file, open with write or append permissions
              metadata and analysis groups already exist!
    """
    # group metadata arrays
    group = h5.root.metadata
    h5.createEArray(where=group,
                    name='similar_artists',
                    atom=tables.StringAtom(20, shape=()),
                    shape=(0, ),
                    title=ARRAY_DESC_SIMILAR_ARTISTS)
    h5.createEArray(group,
                    'artist_terms',
                    tables.StringAtom(256, shape=()), (0, ),
                    ARRAY_DESC_ARTIST_TERMS,
                    expectedrows=expectedrows * 40)
    h5.createEArray(group,
                    'artist_terms_freq',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_ARTIST_TERMS_FREQ,
                    expectedrows=expectedrows * 40)
    h5.createEArray(group,
                    'artist_terms_weight',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_ARTIST_TERMS_WEIGHT,
                    expectedrows=expectedrows * 40)
    # group analysis arrays
    group = h5.root.analysis
    h5.createEArray(where=group,
                    name='segments_start',
                    atom=tables.Float64Atom(shape=()),
                    shape=(0, ),
                    title=ARRAY_DESC_SEGMENTS_START)
    h5.createEArray(group,
                    'segments_confidence',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SEGMENTS_CONFIDENCE,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'segments_pitches',
                    tables.Float64Atom(shape=()), (0, 12),
                    ARRAY_DESC_SEGMENTS_PITCHES,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'segments_timbre',
                    tables.Float64Atom(shape=()), (0, 12),
                    ARRAY_DESC_SEGMENTS_TIMBRE,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'segments_loudness_max',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SEGMENTS_LOUDNESS_MAX,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'segments_loudness_max_time',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SEGMENTS_LOUDNESS_MAX_TIME,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'segments_loudness_start',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SEGMENTS_LOUDNESS_START,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'sections_start',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SECTIONS_START,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'sections_confidence',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_SECTIONS_CONFIDENCE,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'beats_start',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_BEATS_START,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'beats_confidence',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_BEATS_CONFIDENCE,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'bars_start',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_BARS_START,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'bars_confidence',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_BARS_CONFIDENCE,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'tatums_start',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_TATUMS_START,
                    expectedrows=expectedrows * 300)
    h5.createEArray(group,
                    'tatums_confidence',
                    tables.Float64Atom(shape=()), (0, ),
                    ARRAY_DESC_TATUMS_CONFIDENCE,
                    expectedrows=expectedrows * 300)
    # group musicbrainz arrays
    group = h5.root.musicbrainz
    h5.createEArray(where=group,
                    name='artist_mbtags',
                    atom=tables.StringAtom(256, shape=()),
                    shape=(0, ),
                    title=ARRAY_DESC_ARTIST_MBTAGS,
                    expectedrows=expectedrows * 5)
    h5.createEArray(group,
                    'artist_mbtags_count',
                    tables.IntAtom(shape=()), (0, ),
                    ARRAY_DESC_ARTIST_MBTAGS_COUNT,
                    expectedrows=expectedrows * 5)
예제 #21
0
def opensourcefile(k, filename=None, sourcetype=None, overwrite=False):
    """Open the source term hdf5 file with filename."""
    import tables
    #Set up file for results
    if not filename or not os.path.isdir(os.path.dirname(filename)):
        source_logger.info("File or path to file %s does not exist." %
                           filename)
        date = time.strftime("%Y%m%d%H%M%S")
        filename = os.path.join(os.getcwd(), "src" + date + ".hf5")
        source_logger.info("Saving source results in file " + filename)
    if not sourcetype:
        raise TypeError(
            "Need to specify filename and type of source data to store [int(egrand)|(full)term]!"
        )
    if sourcetype in ["int", "term"]:
        sarrname = "source" + sourcetype
        if _debug:
            source_logger.debug("Source array type: " + sarrname)
    else:
        raise TypeError("Incorrect source type specified!")
    #Check if file exists and set write flags depending on overwrite option
    if os.path.isfile(filename):
        if overwrite:

            source_logger.info("File %s exists and will be overwritten." %
                               filename)
            writeflag = "w"
        else:
            source_logger.info("File %s exists and results will be appended." %
                               filename)
            writeflag = "a"
    else:
        writeflag = "w"

    #Add compression to files and specify good chunkshape
    filters = tables.Filters(complevel=1, complib=configuration.hdf5complib)
    #cshape = (10,10,10) #good mix of t, k, q values
    #Get atom shape for earray
    atomshape = (0, len(k))
    try:
        if _debug:
            source_logger.debug("Trying to open source file " + filename)
        rf = tables.openFile(filename, writeflag, "Source term result")
        if not "results" in rf.root:
            if _debug:
                source_logger.debug("Creating group 'results' in source file.")
            resgrp = rf.createGroup(rf.root, "results", "Results")
        else:
            resgrp = rf.root.results
        if not sarrname in resgrp:
            if _debug:
                source_logger.debug("Creating array '" + sarrname +
                                    "' in source file.")
            sarr = rf.createEArray(resgrp,
                                   sarrname,
                                   tables.ComplexAtom(itemsize=16),
                                   atomshape,
                                   filters=filters)
            karr = rf.createEArray(resgrp,
                                   "k",
                                   tables.Float64Atom(), (0, ),
                                   filters=filters)
            narr = rf.createEArray(resgrp,
                                   "nix",
                                   tables.IntAtom(), (0, ),
                                   filters=filters)
            karr.append(k)
        else:
            if _debug:
                source_logger.debug(
                    "Source file and node exist. Testing source node shape...")
            sarr = rf.getNode(resgrp, sarrname)
            narr = rf.getNode(resgrp, "nix")
            if sarr.shape[1:] != atomshape[1:]:
                raise ValueError("Source node on file is not correct shape!")
    except IOError:
        raise
    return rf, sarr, narr
예제 #22
0
        sequence_features=sequence_features)

    return contexts, features


dataset = TFRecordDataset(files)

dataset = dataset.map(_parse_function)

iterator = dataset.make_one_shot_iterator()

data_shape = (0, )
labels_shape = (0, )

sound_dtype = tables.StringAtom(itemsize=128)
labels_dtype = tables.IntAtom()

data_storage = hdf5_file.create_earray(hdf5_file.root,
                                       'audio_embedding',
                                       sound_dtype,
                                       shape=data_shape)
labels_storage = hdf5_file.create_earray(hdf5_file.root,
                                         'labels',
                                         labels_dtype,
                                         shape=labels_shape)

value = iterator.get_next()
i = 1
with tf.Session() as sess:
    while 1:
        try:
예제 #23
0
climb_lik_file = '/RQusagers/dnelson/project/anc_finder/results/BALSAC/CAID_3M_all_anc_out.csv'

max_trees = 1000

print "Loading climbing likelihoods"
climb_liks = np.genfromtxt(climb_lik_file,
                           skip_header=True,
                           delimiter=',',
                           usecols=[1])[:max_trees]
print "Loading trees"
trees = [line.strip() for line in open(tree_anc_file, 'r')][:max_trees]

climb_outfile = os.path.expanduser('~/temp/CAID_climb_1000.h5')
with tables.open_file(climb_outfile, 'w') as f:
    ## Create extendable arrays so we can incrementally write output
    f.create_earray(f.root, 'liks', atom=tables.FloatAtom(), shape=(0, ))
    f.create_earray(f.root, 'ancs', atom=tables.FloatAtom(), shape=(0, ))

    ## Trees, which are variable-length, must be added individually
    f.create_vlarray(f.root, 'trees', atom=tables.IntAtom())
    f.create_vlarray(f.root, 'genotypes', atom=tables.IntAtom())

incremental_write(climb_liks, trees, climb_outfile)

## Store control likelihoods
# control_outfile = os.path.expanduser('~/temp/CAID_control.h5')
#
# init_array(control_outfile, array_name='control_liks')
# with tables.open_file(control_outfile, 'a') as f:
#     f.root.control_liks.append([np.log2(conv_prob)])
예제 #24
0
def process_filelist_test(filelist=None,
                          model=None,
                          tmpfilename=None,
                          npicks=None,
                          winsize=None,
                          finaldim=None,
                          K=1,
                          typecompress='picks'):
    """
    Main function, process all files in the list (as long as their artist
    is in testartist)
    INPUT
       filelist     - a list of song files
       model        - h5 file containing feats and year for all train songs
       tmpfilename  - where to save our processed features
       npicks       - number of segments to pick per song
       winsize      - size of each segment we pick
       finaldim     - how many values do we keep
       K            - param of KNN (default 1)
       typecompress - feature type, 'picks', 'corrcoeff' or 'cov'
                      must be the same as in training
    """
    # sanity check
    for arg in locals().values():
        assert not arg is None, 'process_filelist_test, missing an argument, something still None'
    if os.path.isfile(tmpfilename):
        print 'ERROR: file', tmpfilename, 'already exists.'
        return
    if not os.path.isfile(model):
        print 'ERROR: model', model, 'does not exist.'
        return
    # create kdtree
    h5model = tables.openFile(model, mode='r')
    assert h5model.root.data.feats.shape[
        1] == finaldim, 'inconsistency in final dim'
    kd = ANN.kdtree(h5model.root.data.feats)
    # create outputfile
    output = tables.openFile(tmpfilename, mode='a')
    group = output.createGroup("/", 'data', 'TMP FILE FOR YEAR RECOGNITION')
    output.createEArray(group,
                        'year_real',
                        tables.IntAtom(shape=()), (0, ),
                        '',
                        expectedrows=len(filelist))
    output.createEArray(group,
                        'year_pred',
                        tables.Float64Atom(shape=()), (0, ),
                        '',
                        expectedrows=len(filelist))
    # random projection
    ndim = 12  # fixed in this dataset
    if typecompress == 'picks':
        randproj = RANDPROJ.proj_point5(ndim * winsize, finaldim)
    elif typecompress == 'corrcoeff' or typecompress == 'cov':
        randproj = RANDPROJ.proj_point5(ndim * ndim, finaldim)
    elif typecompress == 'avgcov':
        randproj = RANDPROJ.proj_point5(90, finaldim)
    else:
        assert False, 'Unknown type of compression: ' + str(typecompress)
    # go through files
    cnt_f = 0
    for f in filelist:
        cnt_f += 1
        if cnt_f % 5000 == 0:
            print 'TESTING FILE #' + str(cnt_f)
        # check file
        h5 = GETTERS.open_h5_file_read(f)
        artist_id = GETTERS.get_artist_id(h5)
        year = GETTERS.get_year(h5)
        track_id = GETTERS.get_track_id(h5)
        h5.close()
        if year <= 0:  # probably useless but...
            continue
        if typecompress == 'picks':
            # we have a train artist with a song year, we're good
            bttimbre = get_bttimbre(f)
            if bttimbre is None:
                continue
            # we even have normal features, awesome!
            processed_feats = CBTF.extract_and_compress(bttimbre,
                                                        npicks,
                                                        winsize,
                                                        finaldim,
                                                        randproj=randproj)
        elif typecompress == 'corrcoeff':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.corr_and_compress(timbres,
                                                     finaldim,
                                                     randproj=randproj)
        elif typecompress == 'cov':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.cov_and_compress(timbres,
                                                    finaldim,
                                                    randproj=randproj)
        elif typecompress == 'avgcov':
            h5 = GETTERS.open_h5_file_read(f)
            timbres = GETTERS.get_segments_timbre(h5).T
            h5.close()
            processed_feats = CBTF.avgcov_and_compress(timbres,
                                                       finaldim,
                                                       randproj=randproj)
        else:
            assert False, 'Unknown type of compression: ' + str(typecompress)
        if processed_feats is None:
            continue
        if processed_feats.shape[0] == 0:
            continue
        # do prediction
        year_pred = do_prediction(processed_feats, kd, h5model, K)
        # add pred and ground truth to output
        if not year_pred is None:
            output.root.data.year_real.append([year])
            output.root.data.year_pred.append([year_pred])
    # close output and model
    del kd
    h5model.close()
    output.close()
    # done
    return
예제 #25
0
    def initialize_database(self, **kargs):
        """
        Initializes the EventDatabase.  Adds a group 'events' with
        table 'eventsTable' and matrices 'raw_data', 'levels', and 'level_lengths'.

        :param kargs: Dictionary - includes:
                        -maxEventLength: Maximum number of datapoints for an event to be added.
        """
        if 'maxEventLength' in kargs:
            if kargs['maxEventLength'] > self.max_event_length:
                self.max_event_length = kargs['maxEventLength']
        if 'events' not in self.root:
            self.createGroup(self.root, 'events', 'Events')

        if not 'eventTable' in self.root.events:
            self.createTable(self.root.events, 'eventTable', _Event,
                             'Event parameters')
            self.event_row = None

        filters = tb.Filters(complib='blosc', complevel=4)
        shape = (0, self.max_event_length)
        a = tb.FloatAtom()
        b = tb.IntAtom()

        if not 'raw_data' in self.root.events:
            self.createEArray(self.root.events,
                              'raw_data',
                              a,
                              shape=shape,
                              title="Raw data points",
                              filters=filters)

        if not 'levels' in self.root.events:
            self.createEArray(self.root.events,
                              'levels',
                              a,
                              shape=shape,
                              title="Cusum levels",
                              filters=filters)

        if not 'level_lengths' in self.root.events:
            self.createEArray(self.root.events,
                              'level_lengths',
                              b,
                              shape=shape,
                              title="Lengths of the cusum levels",
                              filters=filters)

        # Create/init the debug group if needed.
        if 'debug' in kargs and kargs['debug']:
            if not 'debug' in self.root:
                self.createGroup(self.root, 'debug', 'Debug')
            debug_shape = (kargs['n_channels'], kargs['n_points'])
            if not 'data' in self.root.debug:
                self.createCArray(self.root.debug,
                                  'data',
                                  a,
                                  shape=debug_shape,
                                  title="Raw data",
                                  filters=filters)

            if not 'baseline' in self.root.debug:
                self.createCArray(self.root.debug,
                                  'baseline',
                                  a,
                                  shape=debug_shape,
                                  title="Baseline data",
                                  filters=filters)

            if not 'threshold_positive' in self.root.debug:
                self.createCArray(self.root.debug,
                                  'threshold_positive',
                                  a,
                                  shape=debug_shape,
                                  title="Raw data",
                                  filters=filters)

            if not 'threshold_negative' in self.root.debug:
                self.createCArray(self.root.debug,
                                  'threshold_negative',
                                  a,
                                  shape=debug_shape,
                                  title="Raw data",
                                  filters=filters)
예제 #26
0
def create_hdf_arrays(file_name,
                      rec_info,
                      electrode_mapping,
                      emg_mapping,
                      file_dir=None):
    '''Creates empty data arrays in hdf5 store for storage of the intan
    recording data.

    Parameters
    ----------
    file_name : str, absolute path to h5 file
    rec_info : dict
        recording info dict provided by blechpy.rawIO.read_recording_info
    electrode_mapping : pandas.DataFrame
        with colummns Electrode, Port and Channels
    emg_mapping : pandas.Dataframe
        with columns EMG, Port and Channels (can be empty)
    file_dir : str (optional)
        path to recording directory if h5 is in different folder

    Throws
    ------
    ValueError
        if file_name is not absolute path to file and file_dir is not provided
    '''
    if file_dir is None:
        file_dir = os.path.dirname(file_name)

    if file_dir is '':
        raise ValueError(('Must provide absolute path to file in a recording'
                          'directory or a file_dir argument'))

    if not os.path.isabs(file_name):
        file_name = os.path.join(file_dir, file_name)

    println('Creating empty arrays in hdf5 store for raw data...')
    sys.stdout.flush()
    atom = tables.IntAtom()
    f_atom = tables.Float64Atom()
    with tables.open_file(file_name, 'r+') as hf5:

        # Create array for raw time vector
        hf5.create_earray('/raw', 'amplifier_time', f_atom, (0, ))

        # Create arrays for each electrode
        for idx, row in electrode_mapping.iterrows():
            hf5.create_earray('/raw', 'electrode%i' % row['Electrode'], atom,
                              (0, ))

        # Create arrays for raw emg (if any exist)
        if not emg_mapping.empty:
            for idx, row in emg_mapping:
                hf5.create_earray('/raw_emg', 'emg%i' % row['EMG'], atom,
                                  (0, ))

        # Create arrays for digital inputs (if any exist)
        if rec_info.get('dig_in'):
            for x in rec_info['dig_in']:
                hf5.create_earray('/digital_in', 'dig_in_%i' % x, atom, (0, ))

        # Create arrays for digital outputs (if any exist)
        if rec_info.get('dig_out'):
            for x in rec_info['dig_out']:
                hf5.create_earray('/digital_out', 'dig_out_%i' % x, atom,
                                  (0, ))

    print('Done!')
예제 #27
0
def generate_per_subject_cache(xls_data, test_split=0.3, validation_split=0.3):
    prevElem = None
    h5file = tb.openFile(FULL_SPECTROGRAM_BY_SUBJECT_CACHE,
                         mode='w',
                         title="All the data")
    root = h5file.root
    first = True
    X_train = None
    Y_train = None
    X_validate = None
    Y_validate = None
    X_test = None
    Y_test = None
    y_append = None
    X_append = None
    for ss_id, regions_of_interest in xls_data.items():
        avi = maybe_get_unique_avi_from_subjectState_id(ss_id)
        if avi:
            #split the subjects into vlaidation, train and test sets
            ran_num = random.uniform(0, 1)
            if ran_num < test_split:
                X_append, y_append = X_test, Y_test
            elif ran_num < test_split + validation_split:
                X_append, y_append = X_validate, Y_validate
            else:
                X_append, y_append = X_train, Y_train
            sw = SubjectVideo(avi)
            for _, timestamp, bpm in regions_of_interest:
                #reduces the memory used - for testing
                #if random.uniform(0,1) < 0.9:
                #     continue
                timestamp = int(timestamp)
                bpm = round(float(bpm))
                try:
                    _, _, Sxx0 = sw.get_spectrogram(timestamp, 4)
                #workaround this error
                #    max_freq_idx = int((max_freqency / f[-1]) * Sxx.shape[0])
                #IndexError: index -1 is out of bounds for axis 0 with size 0
                except IndexError:
                    print("Something went wrong for avi, timestamp ", avi,
                          timestamp)
                    continue
                elem = np.array([Sxx0])
                if prevElem is not None and elem.shape != prevElem.shape:
                    print("skipping " + str(avi) + " " + ss_id +
                          " due to incorrect shape")
                    continue
                prevElem = elem
                #store the elem to disk
                if first:
                    first = False
                    a = tb.Atom.from_dtype(np.dtype('Float32'))
                    data_shape = tuple([0] + list(elem.shape))
                    X_train = h5file.create_earray(root, 'X_train', a,
                                                   data_shape, "X_train")
                    X_test = h5file.create_earray(root, 'X_test', a,
                                                  data_shape, "X_test")
                    X_validate = h5file.create_earray(root, 'X_validate', a,
                                                      data_shape, "X_validate")
                    Y_train = h5file.create_earray(root, 'Y_train',
                                                   tb.IntAtom(), (0, ),
                                                   "Y_train")
                    Y_test = h5file.create_earray(root, 'Y_test', tb.IntAtom(),
                                                  (0, ), "Y_test")
                    Y_validate = h5file.create_earray(root, 'Y_validate',
                                                      tb.IntAtom(), (0, ),
                                                      "Y_validate")
                    X_append, y_append = X_test, Y_test
                X_append.append(np.array([elem]))
                y_append.append([bpm])
            h5file.flush()
            print("converted " + str(avi) + " for " + X_append.title + " set")
        else:
            print("Skipping " + ss_id + " becuase it isn't unique")
    h5file.close()
예제 #28
0
def bpm_to_data(data, train_split=0.9):
    try:
        return readh5File(SPECTROGRAM_CACHE)
    except IOError:
        pass
    pattern = re.compile(".*vp_(\\d+)_(\\d+)_.*")
    prevElem = None
    # limit = 250
    h5file = tb.openFile(SPECTROGRAM_CACHE, mode='w', title="All the data")
    root = h5file.root
    first = True
    X_train = None
    Y_train = None
    X_test = None
    Y_test = None

    for wavFile in iterateThroughWav():
        m = pattern.match(wavFile)
        subjectId = int(m.group(1))
        stateId = int(m.group(2))
        sw = SubjectWav(wavFile)
        subjectStateId = str(subjectId) + "_" + str(stateId).zfill(2)
        try:
            for _, timestamp, bpm in data[subjectStateId]:
                #reduces the memory used
                #   if random.uniform(0,1) < 0.9:
                #       continue
                timestamp = int(timestamp)
                bpm = round(float(bpm))
                _, _, Sxx0 = sw.get_spectrogram(timestamp, 4)
                # print(Sxx0.shape) #(651,154) (801,219)
                elem = np.array([Sxx0])
                if prevElem is not None and elem.shape != prevElem.shape:
                    print("skipping " + str(wavFile) + " " + subjectStateId +
                          " due to incorrect shape")
                    continue
                prevElem = elem
                #store the elem to disk
                if first:
                    first = False
                    a = tb.Atom.from_dtype(np.dtype('Float32'))
                    data_shape = tuple([0] + list(elem.shape))
                    X_train = h5file.create_earray(root, 'X_train', a,
                                                   data_shape, "X_train")
                    X_test = h5file.create_earray(root, 'X_test', a,
                                                  data_shape, "X_test")
                    Y_train = h5file.create_earray(root, 'Y_train',
                                                   tb.IntAtom(), (0, ),
                                                   "Y_train")
                    #     code.interact(local=locals())
                    Y_test = h5file.create_earray(root, 'Y_test', tb.IntAtom(),
                                                  (0, ), "Y_test")
                if random.uniform(0, 1) < 0.9:
                    X_train.append(np.array([elem]))
                    Y_train.append([bpm])
                else:
                    X_test.append(np.array([elem]))
                    Y_test.append([bpm])
            h5file.flush()
        except KeyError:
            print("can not find: " + subjectStateId + ".")
            pass
        print("converted " + str(wavFile))
    #Could not broadcast error means that not all elemnt of X_train have the same shape
    #usually meaning there is something wrong with files
    h5file.close()
    #    data = (X_train, Y_train) , (X_test, Y_test)
    #    write_cache(SPECTROGRAM_CACHE,data)
    return readh5File(SPECTROGRAM_CACHE)
예제 #29
0
def full_bpm_to_data(data, train_split=0.9):
    try:
        return readh5File(FULL_SPECTROGRAM_CACHE)
    except IOError:
        pass
    prevElem = None
    h5file = tb.openFile(FULL_SPECTROGRAM_CACHE,
                         mode='w',
                         title="All the data")
    root = h5file.root
    first = True
    X_train = None
    Y_train = None
    X_test = None
    Y_test = None
    for ss_id, regions_of_interest in data.items():
        avi = maybe_get_unique_avi_from_subjectState_id(ss_id)
        if avi:
            sw = SubjectVideo(avi)
            for _, timestamp, bpm in regions_of_interest:
                #reduces the memory used
                #   if random.uniform(0,1) < 0.9:
                #       continue
                timestamp = int(timestamp)
                bpm = round(float(bpm))
                try:
                    _, _, Sxx0 = sw.get_spectrogram(timestamp, 4)
                #workaround this error
                #    max_freq_idx = int((max_freqency / f[-1]) * Sxx.shape[0])
                #IndexError: index -1 is out of bounds for axis 0 with size 0
                except IndexError:
                    print("Something went wrong for avi, timestamp ", avi,
                          timestamp)
                    continue
                elem = np.array([Sxx0])
                if prevElem is not None and elem.shape != prevElem.shape:
                    print("skipping " + str(avi) + " " + ss_id +
                          " due to incorrect shape")
                    continue
                prevElem = elem
                #store the elem to disk
                if first:
                    first = False
                    a = tb.Atom.from_dtype(np.dtype('Float32'))
                    data_shape = tuple([0] + list(elem.shape))
                    X_train = h5file.create_earray(root, 'X_train', a,
                                                   data_shape, "X_train")
                    X_test = h5file.create_earray(root, 'X_test', a,
                                                  data_shape, "X_test")
                    Y_train = h5file.create_earray(root, 'Y_train',
                                                   tb.IntAtom(), (0, ),
                                                   "Y_train")
                    Y_test = h5file.create_earray(root, 'Y_test', tb.IntAtom(),
                                                  (0, ), "Y_test")
                if random.uniform(0, 1) < train_split:
                    X_train.append(np.array([elem]))
                    Y_train.append([bpm])
                else:
                    X_test.append(np.array([elem]))
                    Y_test.append([bpm])
            h5file.flush()
            print("converted " + str(avi))
        else:
            print("Skipping " + ss_id + " becuase it isn't unique")
    h5file.close()
    return readh5File(FULL_SPECTROGRAM_CACHE)
예제 #30
0
def train(nthreads,
          maindir,
          output,
          testartists,
          npicks,
          winsize,
          finaldim,
          trainsongs=None,
          typecompress='picks'):
    """
    Main function to do the training
    Do the main pass with the number of given threads.
    Then, reads the tmp files, creates the main output, delete the tmpfiles.
    INPUT
      - nthreads     - number of threads to use
      - maindir      - dir of the MSD, wehre to find song files
      - output       - main model, contains everything to perform KNN
      - testartists  - set of artists to ignore
      - npicks       - number of samples to pick per song
      - winsize      - window size (in beats) of a sample
      - finaldim     - final dimension of the sample, something like 5?
      - trainsongs   - list of songs to use for training
      - typecompress - 'picks', 'corrcoeff' or 'cov'
    RETURN
       - nothing
    """
    # sanity checks
    if os.path.isfile(output):
        print 'ERROR: file', output, 'already exists.'
        return
    # initial time
    t1 = time.time()
    # do main pass
    tmpfiles = process_filelist_train_main_pass(nthreads,
                                                maindir,
                                                testartists,
                                                npicks,
                                                winsize,
                                                finaldim,
                                                trainsongs=trainsongs,
                                                typecompress=typecompress)
    if tmpfiles is None:
        print 'Something went wrong, tmpfiles are None'
        return
    # intermediate time
    t2 = time.time()
    stimelen = str(datetime.timedelta(seconds=t2 - t1))
    print 'Main pass done after', stimelen
    sys.stdout.flush()
    # find approximate number of rows per tmpfiles
    h5 = tables.openFile(tmpfiles[0], 'r')
    nrows = h5.root.data.year.shape[0] * len(tmpfiles)
    h5.close()
    # create output
    output = tables.openFile(output, mode='a')
    group = output.createGroup("/", 'data',
                               'KNN MODEL FILE FOR YEAR RECOGNITION')
    output.createEArray(group,
                        'feats',
                        tables.Float64Atom(shape=()), (0, finaldim),
                        'feats',
                        expectedrows=nrows)
    output.createEArray(group,
                        'year',
                        tables.IntAtom(shape=()), (0, ),
                        'year',
                        expectedrows=nrows)
    output.createEArray(group,
                        'track_id',
                        tables.StringAtom(18, shape=()), (0, ),
                        'track_id',
                        expectedrows=nrows)
    # aggregate temp files
    for tmpf in tmpfiles:
        h5 = tables.openFile(tmpf)
        output.root.data.year.append(h5.root.data.year[:])
        output.root.data.track_id.append(h5.root.data.track_id[:])
        output.root.data.feats.append(h5.root.data.feats[:])
        h5.close()
        # delete tmp file
        os.remove(tmpf)
    # close output
    output.close()
    # final time
    t3 = time.time()
    stimelen = str(datetime.timedelta(seconds=t3 - t1))
    print 'Whole training done after', stimelen
    # done
    return