if not os.path.exists(os.path.join(outputFolder,e)):
    os.mkdir(os.path.join(outputFolder,e))
  #similar to above, make subfolder if doesn't exist

  n = 'rooktemp'
  w = ['0193']
  b = 'Mirror'
  theta = 0.7
  alphaSpatial = '[0.3-0.0]'
  alphaSpatiotemp = '0.3-0.4]'
  alphaNonSpatial = '[0.0-0.0]'
  c_0 = cref[e]
  eventClass = e
  dataset = d
  print eventClass,dataset
  headers,matches = SOLARGenImageList.image_event_matches(dataset=d,waves = w)
  print 'matches calculated'
  treefileSpatial = "results/"+"_".join([str(e),str(n),str(d),"-".join(w),'c_0='+str(c_0),'balance='+str(b),'alpha='+str(alphaSpatial)])+".tree"
  treefileNonSpatial = "results/"+"_".join([str(e),str(n),str(d),"-".join(w),'c_0='+str(c_0),'balance='+str(b),'alpha='+str(alphaNonSpatial)])+".tree"
  treeSpatial = TreeNode('dummy')
  treeNonSpatial = TreeNode('dummy')
  with open(treefileSpatial) as f:
    treeSpatial.load(f)
  with open(treefileNonSpatial) as f:
    treeNonSpatial.load(f)
  S_train,S_test, = read_data(e,n,d,w,b) # read the data set
  cells_train, adj = S_train
  cells_test, adj_test = S_test
  counter = 0
  for x in sorted(matches.keys()): # for each image of the data set
    paramsFilename = x[0]
Ejemplo n.º 2
0
import SOLARGenImageList
import random
import itertools
random.seed(42)

#This code copies the images of the specified datasets from their stored location on CBSIR to a local folder

#events = ['SS','FL','CH','AR','SG','FI']
events = ['AR']
datasets = ['1WEEK']
radius = 1625
for e,d in itertools.product(events,datasets):
  eventClass = e
  dataset = d
  print eventClass,dataset
  headers,matches = SOLARGenImageList.image_event_matches([eventClass],dataset=dataset,waves = ['0171'])
  print 'matches calculated'
  for x in matches.keys():
    paramsFilename = x[0]
    imageFilename = paramsFilename[:-4]+'_th.png'
    shutil.copyfile(imageFilename,os.path.join('images',e+'_'+os.path.basename(imageFilename)))
#  for imageKey in matches[0]:                                # for every image
#    eventBinMat = np.zeros([64,64],dtype = bool)
#    for E in matches[imageKey]:                           # for each event in that image
#      cc = pt.parseChainCode(E,headers)               
#      if cc == "NA":                                  
#        cc = pt.parseBoundingBox(E,headers)           
#      eventBinMat = np.logical_or(eventBinMat, stuff.find_grid_cells(cc)) # combine all the events into one binary matrix for the image


Ejemplo n.º 3
0
def write_experiment_data(circle_mask, event_class, neighborhood, dataset, waves, balance_option):
    grid_for_random_sample = [x for x in
                              itertools.product(range(64), range(64))]  # build a set of cells for picking from
    # TODO: fix magic numbers
    output_file_train = "data/" + "_".join(
        [event_class, neighborhood, dataset, "-".join(waves), 'balance=' + balance_option]) + ".train"
    output_file_test = "data/" + "_".join(
        [event_class, neighborhood, dataset, "-".join(waves), 'balance=' + balance_option]) + ".test"
    if not (os.path.exists(output_file_train) and os.path.exists(output_file_test)):
        print event_class, neighborhood, dataset, waves, balance_option
        read_attempt_data = read_cell_data(event_class, dataset, waves, balance_option)
        if read_attempt_data != -1:  # read success
            all_pixels = read_attempt_data
            cell_tracker = dict([(x['id'], x) for x in all_pixels])
            imagefile_tracker = dict([(x['id'][0], x['id'][3]) for x in all_pixels])
            # we need to know the individual events to generate the balancing filter
            print 'read data successfully'
        else:  # read Failure
            ###################################
            headers, matches = SOLARGenImageList.image_event_matches([event_class], dataset=dataset, waves=waves)
            print 'matches calculated', event_class, dataset, waves
            image_counter = 0
            all_pixels = []
            # build a 'what cell is FI' matrix
            if balance_option == 'None':
                balancing_filter = np.ones([len(matches), 64, 64], dtype=bool)  # keep everything
            else:  # filter will be filled as we process images
                balancing_filter = np.zeros([len(matches), 64, 64], dtype=bool)
            for imagekey in sorted(matches.keys()):  # for every image
                event_binmat = np.zeros([64, 64], dtype=bool)
                image_balancing_filter = np.zeros([64, 64], dtype=bool)
                for E in matches[imagekey]:  # for each event in that image
                    cc = parseChainCode(E, headers)
                    if cc == "NA":
                        cc = parseBoundingBox(E, headers)
                    event_location_mat = find_grid_cells(cc)
                    event_binmat = np.logical_or(
                        event_binmat,
                        event_location_mat)  # combine all the events into one binary matrix for the image
                    buffered_event_location_mat = buffer_binmat(event_location_mat)
                    image_balancing_filter = np.logical_or(
                        image_balancing_filter,
                        buffered_event_location_mat)  # add the positive examples to the training set
                    if balance_option == 'Mirror':
                        mirror_mat = mirror_event(buffered_event_location_mat)
                        image_balancing_filter = np.logical_or(
                            image_balancing_filter,
                            mirror_mat)  # add some negative examples to the training set
                    if balance_option == 'Duplication':
                        dup_mat = reposition_event(buffered_event_location_mat)
                        image_balancing_filter = np.logical_or(
                            image_balancing_filter,
                            dup_mat)  # add some negative examples to the training set
                # end of 'for event' loop
                if balance_option == 'Random':  # randomly undersample negative class
                    num_pos = np.sum(event_binmat)  # calc the number of event cells in this image
                    neg_grid_cells = [x for x in grid_for_random_sample if not event_binmat[x[0], x[1]]]
                    rand_sample_of_grid = random.sample(neg_grid_cells, num_pos)  # randomly pick some negative cells
                    for x in rand_sample_of_grid:
                        image_balancing_filter[x] = True
                balancing_filter[image_counter, :, :] = image_balancing_filter

                image_pixels = []
                with open(imagekey[0]) as f:  # read the parameter data for this image
                    c = csv.reader(f, dialect='excel-tab')
                    cells = [x for x in c]
                for cell in cells:  # compile data/classification for all cells in image
                    row = int(cell[0]) - 1  # silly juan 1 based index
                    col = int(cell[1]) - 1
                    if not circle_mask[row, col]:  # if cell off disk,
                        pass  # do nothing
                    else:  # else
                        s = dict()  # build the cell data structure
                        s['id'] = (image_counter, row, col, imagekey)
                        if event_binmat[row, col]:
                            s['class'] = event_class
                        else:
                            s['class'] = 'null'
                        s['P1'] = float(cell[2])
                        s['P2'] = float(cell[3])
                        s['P3'] = float(cell[4])
                        s['P4'] = float(cell[5])
                        s['P5'] = float(cell[6])
                        s['P6'] = float(cell[7])
                        s['P7'] = float(cell[8])
                        s['P8'] = float(cell[9])
                        s['P9'] = float(cell[10])
                        s['P10'] = float(cell[11])
                        image_pixels.append(s)
                all_pixels.extend(image_pixels)
                image_counter += 1
            # end of images loop
            all_pixels = [x for x in all_pixels if
                          balancing_filter[x['id'][0], x['id'][1], x['id'][2]] or x['id'][1] < 32]
            imagefile_tracker = dict([(x['id'][0], x['id'][3]) for x in all_pixels])
            cell_tracker = dict([(x['id'], x) for x in all_pixels])
            write_cell_data(event_class, dataset, waves, balance_option, all_pixels)

        ####################################
        # done generating cells, now we setup neighbor relationships
        fullHAdj = dict()
        fullIAdj = dict()
        for pix in all_pixels:  # this part correctly assigns neighbors to each cell of the image
            # it's done after all the pixels are processed because we want the neighbors to exist

            imNum, iir, iic, imFile = pix['id']
            if neighborhood == 'rook':
                neighbors = [
                    (imNum, iir - 1, iic, imFile),
                    (imNum, iir, iic - 1, imFile),
                    (imNum, iir, iic + 1, imFile),
                    (imNum, iir + 1, iic, imFile)
                ]

            elif neighborhood == 'rooktemp':
                neighbors = [
                    (imNum, iir - 1, iic, imFile),
                    (imNum, iir, iic - 1, imFile),
                    (imNum, iir, iic + 1, imFile),
                    (imNum, iir + 1, iic, imFile),
                    (imNum + 1, iir, iic, get_image_file(imagefile_tracker, imNum + 1)),
                    (imNum - 1, iir, iic, get_image_file(imagefile_tracker, imNum - 1))
                ]

            elif neighborhood == 'rooktemplong':
                neighbors = [
                    (imNum, iir - 1, iic, imFile),
                    (imNum, iir, iic - 1, imFile),
                    (imNum, iir, iic + 1, imFile),
                    (imNum, iir + 1, iic, imFile),
                    (imNum + 1, iir, iic, get_image_file(imagefile_tracker, imNum + 1)),
                    (imNum - 1, iir, iic, get_image_file(imagefile_tracker, imNum - 1)),
                    (imNum + 2, iir, iic, get_image_file(imagefile_tracker, imNum + 2)),
                    (imNum - 2, iir, iic, get_image_file(imagefile_tracker, imNum - 2)),
                    (imNum + 3, iir, iic, get_image_file(imagefile_tracker, imNum + 3)),
                    (imNum - 3, iir, iic, get_image_file(imagefile_tracker, imNum - 3))
                ]

            elif neighborhood == 'queen':
                neighbors = [
                    (imNum, iir - 1, iic - 1, imFile),
                    (imNum, iir - 1, iic + 0, imFile),
                    (imNum, iir - 1, iic + 1, imFile),
                    (imNum, iir + 0, iic - 1, imFile),
                    (imNum, iir + 0, iic + 1, imFile),
                    (imNum, iir + 1, iic - 1, imFile),
                    (imNum, iir + 1, iic + 0, imFile),
                    (imNum, iir + 1, iic + 1, imFile)
                ]
            else:
                raise Exception('neighborhood option not supported')
            hl = []  # list of homogenous neighbors (actual object, not just index) for pix
            il = []  # list of inhomogenous neighbors (actual object, not just index) for pix
            for Nindex in neighbors:
                try:
                    N = cell_tracker[Nindex]
                    if N['class'] == pix['class']:
                        hl.append(N)
                    else:
                        il.append(N)
                except KeyError:  # key errors will occur due to mask-based removal of cells and OOB issues
                    pass
            fullHAdj[pix['id']] = hl
            fullIAdj[pix['id']] = il
        # end for pix loop
        random.shuffle(all_pixels)  # I think this is an artifact of when we were dividing train and test sets randomly?
        # TODO: figure out if I can remove above line
        train_pixels = [x for x in all_pixels if x['id'][1] >= 32]  # train on the top half of every image
        test_pixels = [x for x in all_pixels if x['id'][1] < 32]  # test on the bottom half of every image

        A = set(x['id'] for x in train_pixels)
        B = set(x['id'] for x in test_pixels)
        train_adj_h = dict()
        train_adj_i = dict()
        test_adj_h = dict()
        test_adj_i = dict()
        for key in fullHAdj:  # both adjacency mats have the same keys
            HNlist = fullHAdj[key]
            INlist = fullIAdj[key]
            if key in A:
                newHNlist = [x for x in HNlist if x['id'] in A]
                newINlist = [x for x in INlist if x['id'] in A]
                train_adj_h[key] = newHNlist
                train_adj_i[key] = newINlist
            else:
                newHNlist = [x for x in HNlist if x['id'] in B]
                newINlist = [x for x in INlist if x['id'] in B]
                test_adj_h[key] = newHNlist
                test_adj_i[key] = newINlist

        s_train = train_pixels, train_adj_h, train_adj_i
        s_test = test_pixels, test_adj_h, test_adj_i
        with open(output_file_train, 'wb') as f:
            pickle.dump(s_train, f)
        with open(output_file_test, 'wb') as f:
            pickle.dump(s_test, f)
    else:  # file we would write to already exists
        print "already generated", event_class, neighborhood, dataset