def separate_validation_set(data_dir, num_per_class=384, ratio=1):
  '''For comparing impact of different imbalance ratios: this script
  extracts a perfectly balanced validation set from a sequence of nets
  with strict training set inclusion. '''

  if os.path.exists(ojoin(data_dir,'validation')):
    shutil.rmtree(ojoin(data_dir,'validation'))
  nets = [net for net in os.listdir(data_dir) 
          if net.startswith('net_')]
  os.mkdir(ojoin(data_dir,'validation'))

  min_ratio_net_dir = ojoin(data_dir, 'net_'+str(len(nets)-1))
  print 'min_ratio_net_dir: %s'%('net_'+str(len(nets)-1))

  removed = extract_validation_set(min_ratio_net_dir, 
                                  num_per_class, ratio)

  for net in nets[:-1]: 
    remove_imgs(ojoin(data_dir,net), removed)

  print "Done. Now on each graphic machine, you need to:"
  print "  1) run batching on validation here on graphic02. "
  print "  2) scp the validation-batch dir and a net-raw dir from graphic02"
  print "  3) run batching on net dir"
  print "  4) copy validation-batch dir to each remote net-batch dir"
  print "  5) copy validation batches to net dir, but changing batch numbers such that they follow from the max batch in net dir. NOTE: you have a script for this :) merge_validation_batches()"
def imbalance_experiment(data_dir, min_ratio, max_ratio, num_nets):
  ''' given a data directory containing subdirs to each class, a range
  of imbalance ratios to cover, and a number of nets to train, creates
  num_nets directories, each holding a subdir for each class, with 
  max_ratio as imbalance for net_0, ..., min_ratio as imbalance for 
  net_num_nets. '''

  if min_ratio < 1 or max_ratio < 1: 
    print 'Error: ratios must be >=1.'
    exit

  # using cool log calculus, compute la raison de la suite 
  # geometrique donnant les ratios a obtenir pour chaque net.
  step = compute_step(min_ratio, max_ratio, num_nets)

  # move contents of data_dir to a new subdir, 'all'
  if os.path.isdir(ojoin(data_dir,'all')):
    shutil.rmtree(ojoin(data_dir,'all'))
  all_names = os.listdir(data_dir)
  for name in all_names:
    shutil.move(ojoin(data_dir,name), ojoin(data_dir,'all',name))

  # recursively make subdirs for each net, preserving strict set 
  # inclusion from net[i] to net[i+1]
  nets = ['all'] + ['net_'+str(i) for i in range(num_nets)]
  random_delete_recursive(data_dir, step, nets, ratio=2, i=0)
  print 'NOTE: net_0 has highest imbalance ratio.'
def random_delete_aux(data_dir, ratio):
  ''' randomly deletes as few images from outnumbering class dirs as
      possible such that #biggest/#smallest == ratio. '''

  data_dir = os.path.abspath(data_dir)
  dump = raw_input('Do you want a json dump in %s of which files were randomly deleted?(Y/any) '%(data_dir))
    
  # D is for dict, d is for directory
  D = {}
  os.chdir(data_dir)
  dirs = [d for d in os.listdir(data_dir) if os.path.isdir(ojoin(data_dir,d))]
  
  print 'the directories are: %s'%(dirs)

  for d in dirs:
    D[d] = {}
    D[d]['total'] = len(os.listdir(ojoin(data_dir,d)))

  dirs = [(d,D[d]['total']) for d in D.keys()]
  dirs = sorted(dirs, key = lambda x: x[1])

  print '%s is smallest class with %i images'%(dirs[0][0],dirs[0][1])
  for d in D.keys():
    D[d]['remove'] = max(0,int(D[d]['total']-(ratio*dirs[0][1])))
    print '%s has %i images so %i will be randomly removed'%(d, D[d]['total'], D[d]['remove'])
    if D[d]['remove'] > 0 :
      D = random_delete_aux2(data_dir,d,D)

  if dump == 'Y': json.dump(D, open(data_dir+'/random_remove_dict.txt','w'))
  return D
def random_delete_aux2(data_dir,d,D,delete_hard=False):
  D[d]['deleted'] = random.sample(os.listdir(ojoin(data_dir,d)),D[d]['remove'])
  print 'successfully condemned images from %s'%(d)
  back = os.getcwd()
  os.chdir(ojoin(data_dir,d))
  for link in D[d]['deleted']: os.remove(link)
  os.chdir(back)
  return D
def random_delete_recursive(data_dir, step, nets, ratio, i):
#  os.mkdir(ojoin(data_dir,nets[i+1]))
  if os.path.isdir(ojoin(data_dir,nets[i+1])):
    shutil.rmtree(ojoin(data_dir,nets[i+1]))
  shutil.copytree(ojoin(data_dir, nets[i]), 
                  ojoin(data_dir, nets[i+1]), symlinks=True)
  random_delete_aux(ojoin(data_dir, nets[i+1]), ratio)
  if i+2 in range(len(nets)):
    random_delete_recursive(data_dir, step, nets, float(ratio)/step, i+1)
Esempio n. 6
0
 def convert_bboxes(self, p, img_id):
     im = Image.open(ojoin(p, '{}.jpg'.format(img_id)))
     bboxes = self.read_csv(ojoin(p, '{}.csv'.format(img_id)))
     new_bbox = []
     for bbox in bboxes:
         bbox = self.convert(im.size, bbox)
         # Add a zero for the class. Only 1 here
         new_bbox.append([0] + bbox)
     new_bbox = pd.DataFrame(new_bbox)
     # Dump the file to a csv file with a space separator
     fname = ojoin(p, 'labels', '{}.txt'.format(img_id))
     new_bbox.to_csv(fname, index=None, header=None, sep=" ")
def extract_validation_set(net_dir, num_per_class, ratio):
  '''randomly move num_per_class images out of each dir, and into a
  new sidealong dir called validation. '''
  classes = os.listdir(net_dir)
  print 'going to extract %i images from: %s'%(num_per_class,classes)
  d = {}
  for c in classes:
    os.mkdir(ojoin(net_dir, '..', 'validation',c))
    d[c] = random.sample(os.listdir(ojoin(net_dir,c)), num_per_class)
    for fname in d[c]:
      shutil.move(ojoin(net_dir, c, fname),
                  ojoin(net_dir, '..', 'validation',c))
  return d
Esempio n. 8
0
def dump_to_files(Keep, dump, data_info):
  if os.path.exists(data_info): rmtree(data_info)
  os.mkdir(data_info)
  dump_fnames = ['train.txt','val.txt','test.txt']
  for i in xrange(3):
    dfile = open(ojoin(data_info,dump_fnames[i]),'w')
    dfile.writelines(["%s %i\n" % (f,num) for (f,num) in dump[i]])
    dfile.close()
    
  # write to read file how to interpret values as classes      
  read_file = open(ojoin(data_info,'read.txt'), 'w')    
  read_file.writelines(["%i %s\n" % (num,label) for (num, label)
                         in enumerate(Keep.keys())])
  read_file.close()
def get_train_file(model_dir):
  train_file = ''
  for fname in os.listdir(model_dir):
    if fname.endswith('train.prototxt'):
      return open(ojoin(model_dir,fname),'r')
  print 'no train prototxt found'
  sys.exit()
Esempio n. 10
0
def main():
    config_file = ojoin(CFG_PATH, CFG_FILE)
    if SHOW:
        subs = get_avail_subs(config_file)
        print("Following subscriptions are availabe in your config:")
        print("\n".join(subs))
    elif ADD_SUB:
        print("New configuration entry will be created.")
        print("Please provide appropriate information:")
        sub_name = input("Subscription name: ")
        sub_id = input("Subscription ID: ")
        tenant_id = input("Tenant ID: ")
        client_id = input("Client ID: ")
        client_secret = getpass.getpass("Client Secret: ")
        new_cfg = add_sub_cfg(sub_name, tenant_id, sub_id, client_id,
                              client_secret, config_file)
        with open(config_file, 'w') as fh:
            json.dump(new_cfg, fh, indent=2)
    elif DEL_SUB:
        new_cfg = del_sub_cfg(DEL_SUB, config_file)
        with open(config_file, 'w') as fh:
            json.dump(new_cfg, fh, indent=2)

        print(f"{DEL_SUB} deleted.")
    else:
        variables = get_sub_secrets(SUBS, config_file)
        if variables:
            print(exp_templ.format(**variables))
        else:
            print(f"Subscription {SUBS} does not exist in config file.")
            sys.exit(1)
Esempio n. 11
0
class Friends(BaseHandler):
    '''Handles the list of friends'''
    url = ojoin(Main.url, "friends")

    @web.authenticated
    def get(self):
        self.render('friendlist.html')
Esempio n. 12
0
def test_wellintersections_tvdrange_wfilter(loadwells1):
    """Find well crossing using coarser sampling to Fence, with
    wfilter settings.
    """

    wfilter = {
        "parallel": {
            "xtol": 4.0,
            "ytol": 4.0,
            "ztol": 2.0,
            "itol": 10,
            "atol": 5.0
        }
    }

    mywell_list = loadwells1

    mywells = Wells()
    mywells.wells = mywell_list
    print("Limit TVD and downsample...")
    mywells.limit_tvd(1300, 1400)
    mywells.downsample(interval=6)
    print("Limit TVD and downsample...DONE")

    dfr = mywells.wellintersections(wfilter=wfilter)
    dfr.to_csv(ojoin(td, "wells_crossings_filter.csv"))
    print(dfr)
Esempio n. 13
0
def test_wellintersections_tvdrange_wfilter(loadwells1):
    """Find well crossing using coarser sampling to Fence, with
    wfilter settings.
    """

    wfilter = {
        'parallel': {
            'xtol': 4.0,
            'ytol': 4.0,
            'ztol': 2.0,
            'itol': 10,
            'atol': 5.0
        }
    }

    mywell_list = loadwells1

    mywells = Wells()
    mywells.wells = mywell_list
    print('Limit TVD and downsample...')
    mywells.limit_tvd(1300, 1400)
    mywells.downsample(interval=6)
    print('Limit TVD and downsample...DONE')

    dfr = mywells.wellintersections(wfilter=wfilter)
    dfr.to_csv(ojoin(td, 'wells_crossings_filter.csv'))
    print(dfr)
Esempio n. 14
0
class DeleteTask(BaseHandler):

    url = ojoin(Task.url, "delete")

    @web.authenticated
    def get(self, task_id):
        self.redirect(Tasks)

    @web.authenticated
    @rollback_on_failure
    def post(self, task_id):
        if self.get_argument('delete', 'false') == 'true':
            task = self.session.query(
                orm.Task).filter(orm.Task.task_id == task_id).one()
            if len(task.users) > 1 and self.current_user in task.users:
                task.users.remove(self.current_user)
                self.current_user.notify(
                    'message',
                    "You have been removed from the task '{.name}'".format(
                        task))
            elif len(task.users) == 1 and self.current_user in task.users:
                self.session.delete(task)
                self.current_user.notify(
                    'message',
                    "The task '{.name}' has been deleted.".format(task))
            self.session.commit()
        else:
            print("Didn't get the expected argument delete=true. Hacking?")
        self.redirect(Tasks)
Esempio n. 15
0
class Logout(BaseHandler):
    url = ojoin(Main.url, 'logout')

    @web.authenticated
    def get(self):
        self.clear_cookie('user')
        self.redirect(Login)
Esempio n. 16
0
class ShareTask(BaseHandler):
    r'''Handles sharing tasks'''

    url = ojoin(Task.url, "share")

    @web.authenticated
    @rollback_on_failure
    def post(self, task_id):
        try:
            task = self.session.query(
                orm.Task).filter_by(task_id=task_id).one()
            if task not in self.current_user.tasks:
                raise Exception('User does not own this task')
            email = self.get_argument('friend', None)
            if email is None:
                raise Exception('Email argument not given')
            friend = self.session.query(orm.User).filter_by(email=email).one()
            if friend not in self.current_user.friends:
                raise Exception(
                    'Cannot share a task with someone who is not a friend.')
            friend.share_task(task=task, sharer=self.current_user)
            self.session.commit()
        except Exception as e:
            print(str(e))
        finally:
            self.redirect(Tasks)
def rename_classes(to_dir, labels):
  ''' once move_to_dirs is done, may wish to rename classes (eg so 
  they can fit in preds). '''
  more = 'Y'
  while more == 'Y':
    if raw_input('Rename (another) class? (Y/N) ') == 'Y':
      rename = [-1]
      while not all([idx in range(len(labels)) for idx in rename]):
        for elem in enumerate(labels): print elem
        rename = [int(elem) for elem in raw_input("Name a class number from above: ").split()]
      new_name = raw_input('Rename to: ')
      os.rename(ojoin(to_dir,labels[rename[0]]), 
                ojoin(to_dir,new_name))
      labels = update_labels(labels, rename, new_name)
    else: more = 'N'
    return labels
Esempio n. 18
0
class NewTask(BaseHandler):
    r'''Allows creating a new task'''

    url = ojoin(Tasks.url, 'new')

    def get(self):
        self.render('newtask.html')
Esempio n. 19
0
class Tasks(BaseHandler):
    '''Allows creation of tasks'''

    url = ojoin(Main.url, "tasks")

    @web.authenticated
    def get(self):
        '''Renders the task list'''
        self.render('tasklist.html')

    @web.authenticated
    @rollback_on_failure
    def post(self):
        '''Adds a task to the current user'''
        t = orm.Task(
            self.get_argument('taskname'), int(self.get_argument('length')),
            self.get_argument('firstdue'),
            self.get_argument('allowearly', 'off') == 'on',
            int(self.get_argument('points', 100)),
            int(self.get_argument('decay_length',
                                  self.get_argument('length'))),
            set(self.get_argument('tags', '').replace(',', ' ').split()),
            self.get_argument('notes', None))
        t.user_email = self.current_user.email
        self.current_user.tasks.append(t)
        self.session.commit()
        self.redirect(Tasks)
Esempio n. 20
0
def test_quickplot_wells(loadwells1):
    """Import wells from file to Wells and quick plot."""

    mywell_list = loadwells1

    mywells = Wells()
    mywells.wells = mywell_list
    mywells.quickplot(filename=ojoin(td, "quickwells.png"))
Esempio n. 21
0
class Notifications(BaseHandler):
    '''Displays all notifications for a user'''

    url = ojoin(Main.url, 'notifications')

    @web.authenticated
    def get(self):
        self.render('notifications.html')
def write_content_to_deploy_file(model_dir, content):
  model_name = model_dir.split('/')[-1]
  model_name = model_name.split('-fine')[0]
  fname = ojoin(model_dir,model_name+'_deploy.prototxt')
  # print "fname: %s"%(fname)
  deploy_file = open(fname,'w')
  deploy_file.writelines(content)
  deploy_file.close()
def write_content_to_deploy_file(model_dir, content):
    model_name = model_dir.split('/')[-1]
    model_name = model_name.split('-fine')[0]
    fname = ojoin(model_dir, model_name + '_deploy.prototxt')
    # print "fname: %s"%(fname)
    deploy_file = open(fname, 'w')
    deploy_file.writelines(content)
    deploy_file.close()
Esempio n. 24
0
def symlink_dataset(Keep, from_dir, to_dir):
  dump = []
  part = [0, 0.8, 0.87, 1] # partition into train val test
  if os.path.isdir(to_dir): rmtree(to_dir)
  os.mkdir(to_dir)
  for i in xrange(3):
    dump.append([])
    for [num,key] in enumerate(Keep.keys()):
      l = len(Keep[key])
      dump[i] += [[f,num] for f in
                  Keep[key][int(part[i]*l):int(part[i+1]*l)]]
    random.shuffle(dump[i])
  
  # cross_val = [np.array(d, dtype=[('x',object),('y',int)])
  #              for d in dump]
  for d,dname in zip(dump,['train','val','test']):
    data_dst_dir = ojoin(to_dir,dname)
    os.mkdir(data_dst_dir)
    for i in xrange(len(d)):
      if os.path.islink(ojoin(data_dst_dir,d[i][0])):
        old = d[i][0]
        while os.path.islink(ojoin(data_dst_dir,d[i][0])):
          print '%s symlinked already, creating duplicate'%(d[i][0])
          d[i][0] = d[i][0].split('.')[0]+'_.jpg'
        os.symlink(ojoin(from_dir,old),
                   ojoin(data_dst_dir,d[i][0]))
      else: os.symlink(ojoin(from_dir,d[i][0]),
                       ojoin(data_dst_dir,d[i][0]))
  return dump
def get_train_file(model_dir):
  train_file = ''
  for fname in os.listdir(model_dir):
    if fname.endswith('train.prototxt'):
      return open(ojoin(model_dir,fname),'r')
  if not os.path.isdir(model_dir):
    print "error:", model_dir, "does not exist"
  else: print 'no train prototxt found in', model_dir
  sys.exit()
Esempio n. 26
0
 def __init__(self, feature_path, split, batch_size=None, seed=None):
     self.feature_path = feature_path
     self.split = split
     self.hf = ojoin(feature_path, 'feature.h5')
     config = json.load(
         open(ojoin(feature_path, 'feature_config.json'), 'r'))
     self.__dict__.update(config)
     if batch_size is not None:
         self.batch_size = batch_size
     self.nb_sample = self.get_nsample(split)
     if self.batch_size > self.nb_sample:
         self.batch_size = self.nb_sample
         print('Set batch_size to {}'.format(self.nb_sample))
     # DONT CHANGE shuffle - reason in the docstring
     super(H5FeatureIterator, self).__init__(self.nb_sample,
                                             batch_size=self.batch_size,
                                             shuffle=False,
                                             seed=seed)
def get_train_file(model_dir):
    train_file = ''
    for fname in os.listdir(model_dir):
        if fname.endswith('train.prototxt'):
            return open(ojoin(model_dir, fname), 'r')
    if not os.path.isdir(model_dir):
        print "error:", model_dir, "does not exist"
    else:
        print 'no train prototxt found in', model_dir
    sys.exit()
Esempio n. 28
0
def dump_to_files(Keep, data_info, task, data_dir):
  ''' This function "trusts" you. It will overwrite data lookup 
  files. '''
  dump = []
  part = [0, 0.82, 0.89, 1] # partition into train val test
  dump_fnames = ['train.txt','val.txt','test.txt']
  for i in xrange(3):
    dump.append([])
    for [key,num] in [('Default',0),(task,1)]:
      l = len(Keep[key])
      dump[i] += [[f,num] for f in
                  Keep[key][int(part[i]*l):int(part[i+1]*l)]]
    # this is the important shuffle actually
    random.shuffle(dump[i])
    if os.path.isfile(ojoin(data_info,dump_fnames[i])):
      print "WARNING: overwriting", ojoin(data_info,dump_fnames[i])
    with open(ojoin(data_info,dump_fnames[i]),'w') as dfile:
      dfile.writelines(["%s %i\n" % (ojoin(data_dir,f),num)
                        for (f,num) in dump[i]])
def merge_validation_batches(data_dir):
  ''' assuming validation-batches dir is in the net-batches dir, moves
  contents of former into latter, but changing names of batches so
  that batch numbers follow sequentially and validation batch nums are
  highest. '''
  names = os.listdir(data_dir)
  train_batches = [name for name in names if name.startswith('data_batch_')]
  names = os.listdir(ojoin(data_dir, 'validation'))
  valid_batches = [name for name in names if name.startswith('data_batch_')]
  maxx = len(train_batches)

  for (i,batch) in enumerate(valid_batches):
    shutil.move(ojoin(data_dir,'validation',batch),
                ojoin(data_dir,'data_batch_'+str(maxx+i+1)))

  shutil.rmtree(ojoin(data_dir,'validation'))

  print 'validation batches start at data_batch_%i'%(maxx+1)
  print 'WARNING: batches.meta for validation thrown away. this might harm validation performance because the mean being subtracted will not be the mean over the validation set but over the training set. apart from that, don\'t think there\'s a problem. '
Esempio n. 30
0
def run_model(data_path, out_path):

    rules, hard_rules, _, atoms = ground(data_path)
    results = map_inference(rules, hard_rules)

    reviews = atoms['review']
    with open(ojoin(out_path, 'POSITIVEREVIEW.txt'), 'w') as f:
        for (review, paper), (vid, _) in reviews.items():
            print("'%s'\t'%s'\t%f" % (review, paper, results[vid]), file=f)

    acceptable = atoms['acceptable']
    with open(ojoin(out_path, 'ACCEPTABLE.txt'), 'w') as f:
        for paper, (vid, _) in acceptable.items():
            print("'%s'\t%f" % (paper, results[vid]), file=f)

    presents = atoms['presents']
    with open(ojoin(out_path, 'PRESENTS.txt'), 'w') as f:
        for author, (vid, _) in presents.items():
            print("'%s'\t%f" % (author, results[vid]), file=f)
Esempio n. 31
0
def test_wellintersections(loadwells1):
    """Find well crossing"""

    mywell_list = loadwells1

    mywells = Wells()
    mywells.wells = mywell_list
    dfr = mywells.wellintersections()
    logger.info(dfr)
    dfr.to_csv(ojoin(td, "wells_crossings.csv"))
Esempio n. 32
0
def dump_to_files(Keep, data_info, task, data_dir):
    ''' This function "trusts" you. It will overwrite data lookup 
  files. '''
    dump = []
    part = [0, 0.82, 0.89, 1]  # partition into train val test
    dump_fnames = ['train.txt', 'val.txt', 'test.txt']
    for i in xrange(3):
        dump.append([])
        for [key, num] in [('Default', 0), (task, 1)]:
            l = len(Keep[key])
            dump[i] += [[f, num]
                        for f in Keep[key][int(part[i] * l):int(part[i + 1] *
                                                                l)]]
        # this is the important shuffle actually
        random.shuffle(dump[i])
        if os.path.isfile(ojoin(data_info, dump_fnames[i])):
            print "WARNING: overwriting", ojoin(data_info, dump_fnames[i])
        with open(ojoin(data_info, dump_fnames[i]), 'w') as dfile:
            dfile.writelines([
                "%s %i\n" % (ojoin(data_dir, f), num) for (f, num) in dump[i]
            ])
Esempio n. 33
0
class EditTasks(BaseHandler):
    r'''Lists tasks to be edited'''

    url = ojoin(Tasks.url, "edit")

    @web.authenticated
    @rollback_on_failure
    def get(self):
        '''Shows the task edit selection screen'''
        try:
            self.render('edittasks.html')
        except Exception as e:
            print(str(e))
Esempio n. 34
0
    def create_annotations(self):
        '''
        Create the annotation for yolo
        '''
        for split in ['train', 'validation', 'test']:
            print(split)
            p = ojoin(self.data_folder, split)
            if not os.path.isdir(ojoin(p, 'labels')):
                os.mkdir(ojoin(p, 'labels'))

            img_ids = [
                os.path.splitext(f)[0] for f in os.listdir(p)
                if f.endswith('.jpg')
            ]
            path_imgs = []
            for img_id in tqdm(img_ids):
                path_imgs.append(ojoin(p, '{}.jpg'.format(img_id)))
                self.convert_bboxes(p, img_id)

            data = pd.DataFrame(path_imgs)
            data.to_csv(ojoin(self.data_folder, '{}.txt'.format(split)),
                        index=None)
Esempio n. 35
0
def trans_dir_xlsx(in_file_dir):
    from os import listdir
    from os.path import join as ojoin
    file_pathli=[]
    for f in listdir(in_file_dir):
        file_pathli.append(ojoin(in_file_dir,f))
    from threading import Thread
    thread_list=[]
    for f in file_pathli:
        t=Thread(target=trans_to_xlsx,args=(f,f))
    for t in thread_list:
        t.start()
    for t in thread_list:
        t.join()
    pass
Esempio n. 36
0
def get_label_dict_knowing(data_dir, task, pos_class):
  ''' get_label_dict() knowing exactly which flags to look for and 
  how to group them into classes. 
  task is the name of what we're learning to detect,
  pos_class is a list of the actual flag names to look for. '''
  d = {'Default': [], task: []}
  print 'generating specific dict of class:files from %s...'%(data_dir)
  for filename in os.listdir(data_dir):
    if not filename.endswith('.dat'): continue
    with open(ojoin(data_dir, filename)) as f:
      content = [line.strip() for line in f.readlines()]
      if any([label==line for (label,line)
              in itertools.product(pos_class,content)]):
        d[task].append(filename.split('.')[0]+'.jpg')
      else:
        d['Default'].append(filename.split('.')[0]+'.jpg')
  return d
Esempio n. 37
0
def get_label_dict_knowing(data_dir, task, pos_class):
    ''' get_label_dict() knowing exactly which flags to look for and 
  how to group them into classes. 
  task is the name of what we're learning to detect,
  pos_class is a list of the actual flag names to look for. '''
    d = {'Default': [], task: []}
    print 'generating specific dict of class:files from %s...' % (data_dir)
    for filename in os.listdir(data_dir):
        if not filename.endswith('.dat'): continue
        with open(ojoin(data_dir, filename)) as f:
            content = [line.strip() for line in f.readlines()]
            if any([
                    label == line
                    for (label, line) in itertools.product(pos_class, content)
            ]):
                d[task].append(filename.split('.')[0] + '.jpg')
            else:
                d['Default'].append(filename.split('.')[0] + '.jpg')
    return d
Esempio n. 38
0
class Completion(BaseHandler):

    url = ojoin(Task.url, "completions", "({})".format(DATE_REGEX))

    @web.authenticated
    @rollback_on_failure
    def post(self, task_id, completed_on):
        completion_date = parsedate(completed_on)
        task = self.session.query(
            orm.Task).filter(orm.Task.task_id == task_id).one()
        if task.last_completed is None or completion_date > task.last_completed:
            task.complete(self.current_user, parsedate(completed_on))
            self.session.commit()
        else:
            self.current_user.notify(
                'error', "You already completed '{}' on {}".format(
                    task.name, date_str(completion_date)), task.task_id)
            self.session.commit()
        self.redirect(Main)
Esempio n. 39
0
class Invite(BaseHandler):
    '''Handles an invitation to become friends'''
    url = ojoin(Main.url, "invite")

    @web.authenticated
    @rollback_on_failure
    def post(self):
        email = self.get_argument('email')
        if email is None:
            redirect(Friends)
            return
        potential_friend = self.session.query(
            orm.User).filter_by(email=email).one()
        if potential_friend == self.current_user:
            self.current_user.notify(
                'error', 'Forever Alone: you tried to befriend yourself')
        else:
            potential_friend.befriend(self.current_user)
        self.session.commit()
        self.redirect(Friends)
Esempio n. 40
0
class EditTask(BaseHandler):
    r'''Handles editing a task'''

    url = ojoin(Task.url, "edit")

    @web.authenticated
    @rollback_on_failure
    def get(self, task_id):
        '''Shows the form for editing a particular task'''
        try:
            task = next(task for task in self.current_user.tasks
                        if str(task.task_id) == task_id)
        except Exception as e:
            self.set_status(404)
        else:
            try:
                self.render('edittask.html', task=task)
            except Exception as e:
                print(str(e))

    @web.authenticated
    @rollback_on_failure
    def post(self, task_id):
        '''Actually updates the task with the edits'''
        try:
            task = self.session.query(
                orm.Task).filter(orm.Task.task_id == task_id).one()
        except Exception as e:
            self.set_status(404)
        task.name = self.get_argument('taskname', task.name)
        task.length = timedelta(
            days=int(self.get_argument('length', task.length)))
        task.allow_early = self.get_argument(
            'allowearly', 'on' if task.allow_early else 'off') == 'on'
        task.points = int(self.get_argument('points', task.points))
        task.tags = self.get_argument('tags', ', '.join(task.tags)).split(',')
        notes = self.get_argument('notes', task.notes)
        self.current_user.notify(
            'message', 'The task {.name} has been updated'.format(task))
        self.session.commit()
        self.redirect(Tasks)
Esempio n. 41
0
def compare(cli_out, py_out):
    print('presents')
    cli_present = read_present(ojoin(cli_out, 'PRESENTS.txt'))
    py_present = read_present(ojoin(py_out, 'PRESENTS.txt'))
    compare_dicts(cli_present, py_present)

    print('acceptable')
    cli_accept = read_accept(ojoin(cli_out, 'ACCEPTABLE.txt'))
    py_accept = read_accept(ojoin(py_out, 'ACCEPTABLE.txt'))
    compare_dicts(cli_accept, py_accept)

    print('positiveReview')
    cli_rev = read_review(ojoin(cli_out, 'POSITIVEREVIEW.txt'))
    py_rev = read_review(ojoin(py_out, 'POSITIVEREVIEW.txt'))
    compare_dicts(cli_rev, py_rev)
Esempio n. 42
0
class Notification(BaseHandler):

    url = ojoin(Notifications.url, "({})".format(UUID_REGEX))

    @web.authenticated
    @rollback_on_failure
    def post(self, notification_id):
        note = self.session.query(orm.Notification)\
            .filter_by(notification_id=notification_id).one()
        if note not in self.current_user.notifications:
            pass  # will just redirect
        elif self.get_argument('delete', None) == 'true':
            self.current_user.notifications.remove(note)
            self.session.commit()
        elif note.noti_type == 'befriend' and self.get_argument(
                'accept', 'false') == 'true':
            friend = self.session.query(
                orm.User).filter_by(email=note.sender).one()
            self.current_user._followers.append(friend)
            friend.notify(
                'message', '{.name} has accepted your friend request'.format(
                    self.current_user))
            self.current_user.notifications.remove(note)
            self.session.commit()
        elif note.noti_type == 'share' and self.get_argument(
                'accept', 'false') == 'true':
            task = self.session.query(
                orm.Task).filter_by(task_id=note.task_id).one()
            sender = self.session.query(
                orm.User).filter_by(email=note.sender).one()
            self.current_user.tasks.append(task)
            self.current_user.notify(
                'message', "You have accepted the task '{.name}'".format(task))
            sender.notify(
                'message', "{.name} has accepted the task '{.name}'".format(
                    self.current_user, task))
            self.current_user.notifications.remove(note)
            self.session.commit()
        self.redirect(Notifications)
Esempio n. 43
0
class Google(BaseHandler, auth.GoogleMixin):
    url = ojoin(Main.url, "auth", "google")

    @web.asynchronous
    def get(self):
        if self.get_argument("openid.mode", None):
            self.get_authenticated_user(self.async_callback(self._on_auth))
            return
        self.authenticate_redirect()

    def _on_auth(self, user):
        if not user:
            raise web.HTTPError(500, "Google auth failed")
        self.set_secure_cookie('user', user['email'])
        if not self.session.query(
                orm.User).filter_by(email=user['email']).first():
            usr = orm.User(email=user['email'],
                           name=user.get('name'),
                           firstname=user.get('first_name'),
                           lastname=user.get('last_name'))
            self.session.add(usr)
            self.session.commit()
        self.redirect(Main)
def merge_classes(to_dir, labels):
  ''' once move_to_dirs is done, may wish to merge classes. '''
  more = 'Y'
  while more == 'Y':
    print '%s' % (', '.join(map(str,labels)))
    if raw_input('Merge (more) classes? (Y/N) ') == 'Y':
      merge = [-1]
      while not all([idx in range(len(labels)) for idx in merge]):
        for elem in enumerate(labels): print elem
        merge = [int(elem) for elem in raw_input("Name two class numbers from above, separated by ' ': ").split()]

      print 'moving files...'
      for fname in os.listdir(ojoin(to_dir,labels[merge[1]])):
        shutil.move(ojoin(to_dir,labels[merge[1]],fname),
                      ojoin(to_dir,labels[merge[0]]))
      new_label = raw_input('name of merged class? ')
      os.rmdir(ojoin(to_dir,labels[merge[1]]))
      os.rename(ojoin(to_dir,labels[merge[0]]), 
                ojoin(to_dir,new_label))
      labels = update_labels(labels, merge, new_label)

    else: more = False
  return labels
Esempio n. 45
0
    for line in open(path):
        colon_index = line.find(":")
        if colon_index != -1:
            key = line[:colon_index].strip()
            if key in keyvars:
                value = float(line[colon_index + 1:])
                if key in results:
                    results[key].append(value)
                else:
                    results[key] = [value]
                    
    return [results[key] for key in keyvars]


def min_at(values):
    return min( (v, i) for i, v in enumerate(values) )


if __name__ == '__main__':
    logfile = sys.argv[1]
    if not logfile.endswith('.txt'):
        logfile = ojoin(logfile, 'log.txt')

    curves = parse_log(logfile,
                       ['valid_approx_cost_class_corr', 
                        'valid_approx_error_rate'])
    valid_cost, valid_i = min_at(curves[0])
    print 'epochs:\t\t', len(curves[0])
    print 'min cost:\t', valid_cost
    print 'ER at min cost:\t', curves[1][valid_i]
    print 'min ER:\t\t', min(curves[1])
Esempio n. 46
0
    raise Exception("Need to specify --task flag")
  task = optDict["task"]
  data_info = "/data/ad6813/caffe/data/" + task
  
  if not "box" in optDict:
    raise Exception("Need to specify --box flag\nRed, Blue, RedBlue")
  data_dir = "/data/ad6813/pipe-data/" + optDict["box"].capitalize() + "box/raw_data/dump"
  
  if not "learn" in optDict:
    raise Exception("Need to specify --learn flag\nlabNum1-labNum2-...-labNumk")
  pos_class = flag_lookup(optDict["learn"])

  target_bad_min = None
  if "target-bad-min" in optDict:
    target_bad_min = float(optDict["target-bad-min"])
    
  # baseDir = os.path.abspath("../task/" + task) + "/"

  # write to read file how to interpret values as classes and might
  # as well save entire command
  if not os.path.isdir(data_info): os.mkdir(data_info)
  with open(ojoin(data_info,'read.txt'), 'w') as read_file:
    read_file.write(" ".join(sys.argv)+'\n')

  # do your shit
  main(data_dir, data_info, task, pos_class, target_bad_min)

  # still need to automate this
  # p = subprocess.Popen("./setup_rest.sh " + task + " " + str(num_output), shell=True)
  # p.wait()
Esempio n. 47
0
    def to_table(
        self,
        rootname="myconfig",
        destination=None,
        template=None,
        entry=None,
        createfolders=False,
        sep=",",
    ):
        # pylint: disable=too-many-arguments
        # pylint: disable=too-many-branches
        """Export a particular entry in config as text table files;
        one with true values and one with templated variables.

        Args:
            rootname: Root file name without extension. An extension
                .txt will be added for destination, and .txt.tmpl
                for template output.
            destination: The directory path for the destination
                file. If None, then no output will be given
            template: The directory path for the templated
                file. If None, then no templated output will be given.
            entry (str): Using one of the specified key/entry sections in the
                master config that holds a table, e.g. 'global.FWL'.
            createfolders (bool): If True then folders will be created if they
                do not exist (default is False).
            sep (str): Table separator, e.g. ' ', default is ','

        Raises:
            ValueError: If both destination and template output is None,
                or folder does not exist in advance, if createfolder=False,
                or entry is not spesified.

        Example:

            >>> config.to_table('fwl', destination='../',
                                entry='global.FWL')
        """

        if not destination and not template:
            raise ValueError("Both destination and template are None."
                             "At least one of them has to be set!.")

        if entry is None:
            raise ValueError("The entry is None; need a value, "
                             'e.g. "global.FWL"')

        if createfolders:
            self._force_create_folders([destination, template])
        else:
            self._check_folders([destination, template])

        keys = entry.split(".")

        if len(keys) == 1:
            cfg = self.config[keys[0]]
        elif len(keys) == 2:
            cfg = self.config[keys[0]][keys[1]]
        elif len(keys) == 3:
            cfg = self.config[keys[0]][keys[1]][keys[2]]
        elif len(keys) == 4:
            cfg = self.config[keys[0]][keys[1]][keys[2]][keys[3]]
        else:
            raise ValueError("Entry with more that 4 sublevels, not supported")

        if destination:
            with open(ojoin(destination, rootname + ".txt"), "w") as dest:
                for row in cfg:
                    for col in row:
                        stream = str(col)
                        stream = self._get_required_form(stream,
                                                         template=False)
                        # print('<{}>'.format(stream))
                        print(str(stream) + sep, file=dest, end="")
                    print("", file=dest)
        if template:
            with open(ojoin(template, rootname + ".txt.tmpl"), "w") as tmpl:
                for row in cfg:
                    for col in row:
                        stream = str(col)
                        stream = self._get_required_form(stream, template=True)
                        print(str(stream) + sep, file=tmpl, end="")
                    print("", file=tmpl)
  d = {'Perfect': []}
  print 'generating dict of label:files from %s...'%(data_dir)
  for filename in os.listdir(path):
    if not filename.endswith('.dat'): continue
    total_num_images += 1
    fullname = os.path.join(path, filename)
    with open(fullname) as f:
      content = [line.strip() for line in f.readlines()] 
      if content == []:
        d['Perfect'].append(filename.split('.')[0]+'.jpg')
      else:
        for label in content:
          if label not in d.keys(): d[label] = []
          d[label].append(filename.split('.')[0]+'.jpg')
  d['total_num_images'] = total_num_images
  json.dump(d, open('label_dict_'+str(date.today()),'w'))
  return d

if __name__ == '__main__':
  here = os.getcwd()
  data_dir = '/data2/ad6813/pipe-data/Bluebox'
  os.chdir('../scripts/data_preparation')
  d = get_label_dict(data_dir)
  os.chdir(here)
  for label in d.keys():
    if type(d[label]) == list:
      if not os.path.isdir(label): os.mkdir(label)
      length = min(20,len(d[label]))
      for f in d[label][:length]:
        shutil.copy(ojoin(data_dir,f),ojoin(ojoin(label,f)))
def test_imbalance_experiment():
  os.mkdir('temp')
  os.mkdir(ojoin('temp','class1'))
  os.mkdir(ojoin('temp','class2'))
def create_lookup_txtfiles(data_dir, to_dir=None):
  ''' data_dir: where raw data is
      to_dir: where to store .txt files. '''
  
  list_dir = os.listdir(data_dir) # names of all elements in dir
  lastLabelIsDefault = False
  img_labels = [] # image's labels to train on
  dump = []       # contain text to write to .txt files
  case_count = 0 # number of training cases
  tagless_count = 0 # n
  badcase_count = 0 # num of images with multiple flags to train on

  if to_dir is not None:
    train_file = open(ojoin(to_dir,'train.txt'), 'w')
    val_file = open(ojoin(to_dir,'val.txt'), 'w')
    test_file = open(ojoin(to_dir,'test.txt'), 'w')
    read = open(ojoin(to_dir,'read.txt'), 'w')
  
  # get labels of classes to learn
  labels_read = get_all_pipe_labels(data_dir,save=False)['labels']
  lookup = {}
  for num,label in enumerate(labels_read):
    lookup[label] = num
  for elem in enumerate(labels_read): print elem
  labels_read = [labels_read[int(num)] for num in raw_input("Numbers of labels to learn, separated by ' ': ").split()]
  labels_write = labels_read[:]

  lookup, labels_write = merge_classes(lookup, labels_write)

  label_default = raw_input("Default label for all images not containing any of given labels? (name/N) ")
  if label_default is not 'N':
    lastLabelIsDefault = True
    lookup[label_default] = len(labels_write)
    labels_write.append(label_default)
            
  print 'sorting images by class label...'
  for fname in list_dir:
    if not fname.endswith('.dat'): continue
    case_count += 1    
    fullname_dat = os.path.join(data_dir, fname)
    rootname = os.path.splitext(fname)[0]
    with open(fullname_dat) as f:
      content = [line.strip() for line in f.readlines()] 
      img_labels = [label for label in labels_read if label in content]

      # if last label is a normal label, images with no labels will
      # not be batched
      if not img_labels: 
        if lastLabelIsDefault:
          dump.append((fname.split('.')[0]+'.jpg',lookup[label_default]))
        else: tagless_count += 1
      else:
        # if image has multiple flags, it will appear in each flag
        # subdir, each time with only one label. this is very bad for
        # training, so hopefully such cases are very rare.'
        if len(img_labels)>1: 
          badcase_count += len(img_labels)-1
          case_count += len(img_labels)-1
        for label in img_labels:
          dump.append((fname.split('.')[0]+'.jpg',lookup[label]))

  print "dump has %i elements, looking like %s and %s"%(len(dump),dump[0], dump[300])
  # write dump to train and val files
  # randomise!!
  # 10% of dataset for validation, rest for training
  # print "val_dump has %i elements, looking like %s and %s"%(len(val_dump),val_dump[0], val_dump[300])
  non_train_dump_size = int(0.2*len(dump))
  relative_val_size = int(0.34*non_train_dump_size)
  non_train_dump = random.sample(dump, non_train_dump_size)
  val_dump = random.sample(non_train_dump, relative_val_size)
  test_dump = [elem for elem in non_train_dump
               if elem not in val_dump]
  train_dump = [elem for elem in dump if elem not in non_train_dump]
  random.shuffle(train_dump)

  if to_dir is not None:
    train_file.writelines(["%s %i\n" % (fname,num)
                           for (fname,num) in train_dump])
    val_file.writelines(["%s %i\n" % (fname,num)
                         for (fname,num) in val_dump])
    test_file.writelines(["%s %i\n" % (fname,num)
                         for (fname,num) in test_dump])

    # write to read file how to interpret values as classes
    read.writelines(["%i %s\n" % (lookup[label],label,)
                           for label in labels_write])
    train_file.close()
    val_file.close()
    test_file.close()
    read_file.close()


  print 'create_lookup_txtfiles complete. summary stats:'
  print 'badcase_freq: %0.2f' % (float(badcase_count) / case_count)
  print 'tagless_freq: %0.2f' % (float(tagless_count) / case_count)

  return train_dump, val_dump, test_dump
def remove_imgs(net_dir, remove_dic):
  ''' remove_dics knows which imgs to remove in each class subdir,
  and does so in net_dir. '''
  for c in remove_dic.keys():
    for fname in remove_dic[c]:
      os.remove(ojoin(net_dir, c, fname))
    print 'generating dict of label:files from %s...' % (data_dir)
    for filename in os.listdir(path):
        if not filename.endswith('.dat'): continue
        total_num_images += 1
        fullname = os.path.join(path, filename)
        with open(fullname) as f:
            content = [line.strip() for line in f.readlines()]
            if content == []:
                d['Perfect'].append(filename.split('.')[0] + '.jpg')
            else:
                for label in content:
                    if label not in d.keys(): d[label] = []
                    d[label].append(filename.split('.')[0] + '.jpg')
    d['total_num_images'] = total_num_images
    json.dump(d, open('label_dict_' + str(date.today()), 'w'))
    return d


if __name__ == '__main__':
    here = os.getcwd()
    data_dir = '/data2/ad6813/pipe-data/Bluebox'
    os.chdir('../scripts/data_preparation')
    d = get_label_dict(data_dir)
    os.chdir(here)
    for label in d.keys():
        if type(d[label]) == list:
            if not os.path.isdir(label): os.mkdir(label)
            length = min(20, len(d[label]))
            for f in d[label][:length]:
                shutil.copy(ojoin(data_dir, f), ojoin(ojoin(label, f)))
Esempio n. 53
0
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
import os, sys
from os.path import join as ojoin
from subprocess import call

# Make sure that caffe is on the python path:
caffe_root = '../'  # this file is expected to be in {caffe_root}/examples
imagenet_dir = ojoin(caffe_root,'examples/imagenet')
sys.path.insert(0, caffe_root + 'python')

import caffe

# Set the right path to your model definition file, pretrained model weights,
# and the image you would like to classify.
MODEL_FILE = ojoin(imagenet_dir,'imagenet_deploy.prototxt')
PRETRAINED = ojoin(imagenet_dir, 'caffe_reference_imagenet_model')
MEAN_FILE = ojoin(caffe_root,'python/caffe/imagenet/ilsvrc_2012_mean.npy')
IMAGE_FILE = ojoin(caffe_root,'examples/images/cat.jpg')


# get PRETRAINED
# if not os.path.isfile(PRETRAINED):
#   call(['./get_caffe_reference_imagenet_model.sh'])
  
# load network
print os.getcwd()
net = caffe.Classifier(MODEL_FILE, PRETRAINED,
                       image_dims=(256, 256), input_scale=255,
                       mean_file=MEAN_FILE, channel_swap=(2,1,0))