Exemple #1
0
 def createtarball(self):
     mkdir_p(self.workdir)
     with KeepWhileOpenFile(self.tmptarball + ".tmp") as kwof:
         if not kwof:
             return "another process is already copying the tarball"
         if not os.path.exists(self.originaltarball):
             return "original tarball does not exist"
         if datetime.datetime.fromtimestamp(
                 os.path.getmtime(
                     self.originaltarball)) <= self.modifiedafter:
             return "original tarball is an older version than we want"
         mkdir_p(os.path.dirname(self.foreostarball))
         if self.patchkwargs:
             kwargs = self.patchkwargs
             for _ in "oldfilename", "newfilename", "sample":
                 assert _ not in kwargs, _
             with cdtemp():
                 kwargs["oldfilename"] = self.originaltarball
                 kwargs["newfilename"] = os.path.abspath(
                     os.path.basename(self.originaltarball))
                 #kwargs["sample"] = self  #???
                 patches.dopatch(**kwargs)
                 shutil.move(os.path.basename(self.originaltarball),
                             self.foreostarball)
         else:
             shutil.copy(self.originaltarball, self.foreostarball)
         return "gridpack is copied from " + self.originaltarball + " to this folder, to be copied to eos"
Exemple #2
0
  def findmatchefficiency(self):
    #figure out the filter efficiency
    if not self.hasfilter:
      self.matchefficiency = 1
      return "filter efficiency is set to 1 +/- 0"
    else:
      if not self.implementsfilter: raise ValueError("Can't find match efficiency for {.__name__} which doesn't implement filtering!".format(type(self)))
      mkdir_p(self.workdir)
      jobsrunning = False
      eventsprocessed = eventsaccepted = 0
      with cd(self.workdir):
        for i in range(100):
          mkdir_p(str(i))
          with cd(str(i)), KeepWhileOpenFile("runningfilterjob.tmp", message=LSB_JOBID(), deleteifjobdied=True) as kwof:
            if not kwof:
              jobsrunning = True
              continue
            if not os.path.exists(self.filterresultsfile):
              if not LSB_JOBID():
                submitLSF(self.filterefficiencyqueue)
                jobsrunning = True
                continue
              if not queuematches(self.filterefficiencyqueue):
                jobsrunning = True
                continue
              self.dofilterjob(i)
            processed, accepted = self.getfilterresults(i)
            eventsprocessed += processed
            eventsaccepted += accepted

        if jobsrunning: return "some filter efficiency jobs are still running"
        self.matchefficiency = uncertainties.ufloat(1.0*eventsaccepted / eventsprocessed, (1.0*eventsaccepted * (eventsprocessed-eventsaccepted) / eventsprocessed**3) ** .5)
        #shutil.rmtree(self.workdir)
        return "match efficiency is measured to be {}".format(self.matchefficiency)
Exemple #3
0
  def patchtarball(self):
    if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return

    if not self.needspatch: assert False
    mkdir_p(self.workdir)
    with KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof:
      if not kwof:
        return "job to patch the tarball is already running"

      kwargs = self.needspatch
      if isinstance(kwargs, int):
        kwargs = self.patchkwargs
        kwargs["oldtarballversion"] = self.needspatch
      if "oldfilename" in kwargs or "newfilename" in kwargs or "sample" in kwargs: assert False, kwargs
      kwargs["oldfilename"] = self.cvmfstarball_anyversion(version=kwargs.pop("oldtarballversion"))
      kwargs["newfilename"] = self.foreostarball
      mkdir_p(os.path.dirname(self.foreostarball))

      patches.dopatch(**kwargs)

      if not os.path.exists(self.foreostarball): raise RuntimeError("Patching failed, gridpack doesn't exist")
      if self.timeperevent is not None:
        del self.timeperevent
      self.needspatch = False

      return "tarball is patched and the new version is in this directory to be copied to eos"
Exemple #4
0
def download(date,simu_starttime,folder):

    output_directory=folder+"/"+date+"_"+simu_starttime

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    origin_folder =output_directory+"/grib2"
    util.mkdir_p(origin_folder)

    year=date[0:4]

    number=0

    while (number <=384):  
        number_str=("%03d" %(number))
        url="http://rda.ucar.edu/data/ds084.1/"+year+"/"+date+"/gfs.0p25."+date+simu_starttime+".f"+number_str+".grib2"
        global filename
        filename="starting to download file "+date+" "+number_str
   #     print("starting to download "+url+".....")
        wget_command = 'wget -N --no-check-certificate --load-cookies auth.rda_ucar_edu '+url+" -P "+origin_folder
        #os.system(wget_command)
        cmd = wget_command.split()
        subprocess.call(cmd)
#        execute(wget_command)

        if (number<240):
            number=number+3
        else: 
            number=number+12
 def store_remote(self, dest_file, content, manipulate):
     usable_path = self.in_remote_root(dest_file)
     if not exists(usable_path)\
             or self.all_object.get_remote(dest_file) != content:
         mkdir_p(dirname(usable_path))
         write(usable_path, content)
     # XXX: I suspect that ^this and immediate power off of the target
     # system led to truncation of some affected files to length 0!
     manipulate.execute(usable_path)
     self.append_to_file_list(dest_file)
Exemple #6
0
def configure_logger(logging_config_file, log_file):
    try:
        if not os.path.exists(os.path.dirname(os.path.abspath(log_file))):
            mkdir_p(os.path.dirname(os.path.abspath(log_file)))
        logging.config.fileConfig(logging_config_file,
                                  disable_existing_loggers=False,
                                  defaults={'logfilename': log_file})
        logging.getLogger("urllib3").setLevel(logging.ERROR)
        logging.getLogger("requests_kerberos").setLevel(logging.ERROR)
    except Exception as e:
        print("Error in configuring logger %s" % e)
        exit(1)
Exemple #7
0
 def checkandfixtarball(self):
     mkdir_p(self.workdir)
     with KeepWhileOpenFile(os.path.join(self.workdir,
                                         self.prepid + '.tmp'),
                            message=LSB_JOBID(),
                            deleteifjobdied=True) as kwof:
         if not kwof: return " check in progress"
         if not LSB_JOBID():
             self.submitLSF()
             return "Check if the tarball needs fixing"
         with cdtemp():
             subprocess.call(['cp', self.cvmfstarball, '.'])
             subprocess.check_call(['tar', 'xzvf', self.cvmfstarball])
             subprocess.call(['cp', 'readInput.DAT', 'readInput.DAT_bak'])
             os.system('chmod 755 runcmsgrid.sh')
             try:
                 output = subprocess.check_output(
                     ['bash', 'runcmsgrid.sh', '1', '31313', '12'],
                     stderr=subprocess.STDOUT)
             except subprocess.CalledProcessError as e:
                 output = e.output
             for line in output.split('\n'):
                 if not 'Reading in vegas grid from' in line: continue
                 else:
                     line = line.split()[-2]
                     internalgridname = line.split('CMS_')[1]
             internalgridname = str(internalgridname)
             print "internal tarball name: " + internalgridname
             if self.datasetname + '_grid' == internalgridname:
                 with open(os.path.join(self.workdir, 'INTACT'),
                           'w') as fout:
                     fout.write(LSB_JOBID())
                 return str(self.identifiers) + "'s gridpack is intact"
             else:
                 os.system('cp ' + self.datasetname + '_grid ' +
                           internalgridname)
                 os.system('mv readInput.DAT_bak readInput.DAT')
                 os.system('rm -r *tgz CMSSW*')
                 curdirpath = subprocess.check_output(['pwd'])
                 os.system('tar cvaf ' + self.tmptarball + ' ./*')
                 if os.path.exists(self.tmptarball):
                     with open(os.path.join(self.workdir, 'FIXED'),
                               'w') as fout:
                         fout.write(LSB_JOBID())
Exemple #8
0
def launch_scrape(directory, method, params, timestamp, umap):
  cur_date = None
  cur_file = None

  user_string = "|".join(umap.keys())

  # CLOSURE
  def replace_uid(match):
    uid = match.group("id")
    if uid in umap:
      return "@%s"%umap[uid]
    return "<@%s>"%uid

  for message in api.message_generator(method, params, timestamp):
    # the format will be line separated json objects for each message because
    # large json objects are the worst

    # update current file
    datestring_short, datestring_long = get_date_string(message["ts"])
    if cur_file is None or cur_date != datestring_long:
      if not cur_file is None:
        cur_file.close()
      cur_date = datestring_long
      utilities.mkdir_p("%s/%s"%(directory, datestring_short))
      cur_file = open("%s/%s/%s.json"%(directory, datestring_short, datestring_long), "a")

    # clean up message object user ids
    if config.replace_user_ids:
      # replace who's talking
      if "user" in message and message["user"] in umap:
        message["user"] = umap[message["user"]]

      # replace mentions (this is a bit slow!)
      if "text" in message:
        message["text"]  = re.sub("<@(?P<id>" + user_string + ")>", replace_uid, message["text"])

    # save! potentially devastating assumption: chronological
    cur_file.write("%s\n"%json.dumps(message))

  if cur_file:
    cur_file.close()
Exemple #9
0
  def getsizeandtime(self):
    mkdir_p(self.workdir)
    with KeepWhileOpenFile(os.path.join(self.workdir, self.prepid+".tmp"), message=LSB_JOBID(), deleteifjobdied=True) as kwof:
      if not kwof: return "job to get the size and time is already running"
      if not LSB_JOBID(): return "need to get time and size per event, submitting to LSF" if submitLSF(self.timepereventqueue) else "need to get time and size per event, job is pending on LSF"
      if not queuematches(self.timepereventqueue): return "need to get time and size per event, but on the wrong queue"
      with cdtemp():
        wget(os.path.join("https://cms-pdmv.cern.ch/mcm/public/restapi/requests/get_test/", self.prepid, str(self.neventsfortest) if self.neventsfortest else "").rstrip("/"), output=self.prepid)
        with open(self.prepid) as f:
          testjob = f.read()
        with open(self.prepid, "w") as newf:
          newf.write(eval(testjob))
        os.chmod(self.prepid, os.stat(self.prepid).st_mode | stat.S_IEXEC)
        subprocess.check_call(["./"+self.prepid], stderr=subprocess.STDOUT)
        with open(self.prepid+"_rt.xml") as f:
          nevents = totalsize = None
          for line in f:
            line = line.strip()
            match = re.match('<TotalEvents>([0-9]*)</TotalEvents>', line)
            if match: nevents = int(match.group(1))
            match = re.match('<Metric Name="Timing-tstoragefile-write-totalMegabytes" Value="([0-9.]*)"/>', line)
            if match: totalsize = float(match.group(1))
            if self.year >= 2017:
              match = re.match('<Metric Name="EventThroughput" Value="([0-9.eE+-]*)"/>', line)
              if match: self.timeperevent = 1/float(match.group(1))
            else:
              match = re.match('<Metric Name="AvgEventTime" Value="([0-9.eE+-]*)"/>', line)
              if match: self.timeperevent = float(match.group(1))
          if nevents is not None is not totalsize:
            self.sizeperevent = totalsize * 1024 / nevents

    shutil.rmtree(self.workdir)

    if not (self.sizeperevent and self.timeperevent):
      return "failed to get the size and time"
    if LSB_JOBID(): return "size and time per event are found to be {} and {}, run locally to send to McM".format(self.sizeperevent, self.timeperevent)
    self.updaterequest()
    return "size and time per event are found to be {} and {}, sent it to McM".format(self.sizeperevent, self.timeperevent)
    def save_bookmarks(self):
        """Save bookmarks to file structure"""

        data = self.load_browser_bookmarks()
        self._process_bookmarks(data)

        for folder in self.folders.keys():
            # strip forward slashes from folder names
            folder = folder.replace('/', '')
            folder = folder.replace('\\', '')

            # create Chrome directories
            path = self.output_location + '/' + folder + '/'
            mkdir_p(path)

            for item in self.folders[folder]:

                # create new directory for web page
                web_page_path = path + '/' + item['name']
                mkdir_p(web_page_path)

                # create new directory for web page resources directory
                web_page_resources = web_page_path + '/resources'
                mkdir_p(web_page_resources)

                # strip slashes from web page names
                name = item['name']
                name = name.replace('/', '')
                name = name.replace('\\', '')

                # Skip URLs with PDF extension
                if '.pdf' in name[-4:]:
                    continue

                # save files
                try:
                    self.logger.info('Getting URL {}'.format(item['url']))
                    web_object = WebScraper(item['url'])

                    # get web page css
                    css = web_object.get_css()

                    # get web page content
                    content = web_object.get_web_page()

                    # save main web page
                    with open(web_page_path + '/bookmark.html', 'wb') as f:
                        f.write(content)

                    # save css files
                    if css is not None:
                        with open(web_page_resources + '/styles.css', 'wb') as f:
                            f.write(css)

                    self.logger.info('Successfully saved URL {}'.format(item['url']))
                except Exception as e:
                    self.logger.error('Web page not saved - {} - {}'.format(item['url'], e))
                    pass
Exemple #11
0
def setup(filename=CONFIGNAME):
     """Prepare a default GromacsWrapper global environment.

     1) Create the global config file.
     2) Create the directories in which the user can store template and config files.

     This function can be run repeatedly without harm.
     """
     # setup() must be separate and NOT run automatically when config
     # is loaded so that easy_install installations work
     # (otherwise we get a sandbox violation)
     # populate cfg with defaults (or existing data)
     get_configuration()
     if not os.path.exists(filename):
          with open(filename, 'w') as configfile:
               cfg.write(configfile)  # write the default file so that user can edit
               msg = "NOTE: GromacsWrapper created the configuration file \n\t%r\n" \
                     "      for you. Edit the file to customize the package." % filename
               print msg

     # directories
     for d in config_directories:
          utilities.mkdir_p(d)
Exemple #12
0
def setup(filename=CONFIGNAME):
    """Prepare a default BornProfiler global environment.

    1) Create the global config file.
    2) Create the directories in which the user can store template and config files.

    This function can be run repeatedly without harm.
    """
    # setup() must be separate and NOT run automatically when config
    # is loaded so that easy_install installations work
    # (otherwise we get a sandbox violation)
    # Note that cfg is populated with defaults when this module is imported.
    if not os.path.exists(filename):
        with open(filename, 'w') as configfile:
            cfg.write(
                configfile)  # write the default file so that user can edit
        msg = "NOTE: BornProfiler created the configuration file \n\t{0}\n".format(filename) + \
              "      for you. Edit the file to customize the package."
        print msg

    # directories
    utilities.mkdir_p(configdir)
    utilities.mkdir_p(qscriptdir)
    utilities.mkdir_p(templatesdir)
    def save_bookmarks(self):
        """Save bookmarks to file structure"""

        self.logger.info("Starting bookmark collection")

        data = self.load_browser_bookmarks()
        self._process_bookmarks(data)

        for folder in self.folders.keys():
            # strip forward slashes from folder names
            folder = folder.replace("/", "")
            folder = folder.replace("\\", "")

            # create Chrome directories
            path = self.output_location + "/" + folder + "/"
            mkdir_p(path)

            for item in self.folders[folder]:

                # create new directory for web page
                web_page_path = path + "/" + item["name"]
                mkdir_p(web_page_path)

                # create new directory for web page resources directory
                web_page_resources = web_page_path + "/resources"
                mkdir_p(web_page_resources)

                # strip slashes from web page names
                name = item["name"]
                name = name.replace("/", "")
                name = name.replace("\\", "")

                # Skip URLs with PDF extension
                if ".pdf" in name[-4:]:
                    continue

                # save files
                try:
                    self.logger.info("Getting URL {}".format(item["url"]))
                    web_object = WebScraper(item["url"])

                    # get web page css
                    css = web_object.get_css()

                    # get web page content
                    content = web_object.get_web_page()

                    # save main web page
                    with open(web_page_path + "/bookmark.html", "wb") as f:
                        f.write(content)

                    # save css files
                    if css is not None:
                        with open(web_page_resources + "/styles.css", "wb") as f:
                            f.write(css)

                    self.logger.info("Successfully saved URL {}".format(item["url"]))
                except Exception as e:
                    self.logger.error("Web page not saved - {} - {}".format(item["url"], e))
                    pass

        self.logger.info("Completed bookmark collection")
    def save_bookmarks(self):
        """Save bookmarks to file structure"""

        self.logger.info('Starting bookmark collection')

        data = self.load_browser_bookmarks()
        self._process_bookmarks(data)

        for folder in self.folders.keys():
            # strip forward slashes from folder names
            folder = folder.replace('/', '')
            folder = folder.replace('\\', '')

            # create Chrome directories
            path = self.output_location + '/' + folder + '/'
            mkdir_p(path)

            for item in self.folders[folder]:

                # create new directory for web page
                web_page_path = path + '/' + item['name']
                mkdir_p(web_page_path)

                # create new directory for web page resources directory
                web_page_resources = web_page_path + '/resources'
                mkdir_p(web_page_resources)

                # strip slashes from web page names
                name = item['name']
                name = name.replace('/', '')
                name = name.replace('\\', '')

                # Skip URLs with PDF extension
                if '.pdf' in name[-4:]:
                    continue

                # save files
                try:
                    self.logger.info('Getting URL {}'.format(item['url']))
                    web_object = WebScraper(item['url'])

                    # get web page css
                    css = web_object.get_css()

                    # get web page content
                    content = web_object.get_web_page()

                    # save main web page
                    with open(web_page_path + '/bookmark.html', 'wb') as f:
                        f.write(content)

                    # save css files
                    if css is not None:
                        with open(web_page_resources + '/styles.css',
                                  'wb') as f:
                            f.write(css)

                    self.logger.info('Successfully saved URL {}'.format(
                        item['url']))
                except Exception as e:
                    self.logger.error('Web page not saved - {} - {}'.format(
                        item['url'], e))
                    pass

        self.logger.info('Completed bookmark collection')
Exemple #15
0
def main(args):
    # reproducibility
    if args.seed is not None:
        torch.manual_seed(
            args.seed)  # don't think this works with SparseMNIST right now
        np.random.seed(args.seed)
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)
    if args.checkpoint_filename is None:
        checkpoint_file = args.checkpoint + str(datetime.now())[:-10]
    else:
        checkpoint_file = args.checkpoint + args.checkpoint_filename

    # cuda
    args.use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if args.use_cuda else "cpu")

    # eval?
    args.evaluate = args.val_batches > 0

    # prep sparse mnist
    if not args.evaluate:
        train_loader, _, test_loader = prepare_data(args)
    else:
        train_loader, val_loader, test_loader = prepare_data(args)

    # machinery
    model = Judge().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    # setup validation metrics we want to track for tracking best model over training run
    best_val_loss = float('inf')
    best_val_acc = 0

    print('\n================== TRAINING ==================')
    model.train()  # set model to training mode

    # set up training metrics we want to track
    correct = 0
    train_num = args.batches * args.batch_size

    # timer
    time0 = time.time()

    for ix, (
            sparse, img,
            label) in enumerate(train_loader):  # iterate over training batches
        sparse, label = sparse.to(device), label.to(
            device)  # get data, send to gpu if needed
        optimizer.zero_grad(
        )  # clear parameter gradients from previous training update
        logits = model(sparse)  # forward pass
        loss = F.cross_entropy(logits, label)  # calculate network loss
        loss.backward()  # backward pass
        optimizer.step(
        )  # take an optimization step to update model's parameters

        pred = logits.max(1, keepdim=True)[1]  # get the index of the max logit
        correct += pred.eq(
            label.view_as(pred)).sum().item()  # add to running total of hits

        if ix % args.log_interval == 0:  # maybe log current metrics to terminal
            print('Train: [{}/{} ({:.0f}%)]\tLoss: {:.6f}\t\
                Accuracy: {:.2f}%\tTime: {:0f} min, {:.2f} s'.format(
                (ix + 1) * len(sparse), train_num,
                100. * ix / len(train_loader), loss.item(),
                100. * correct / ((ix + 1) * len(sparse)),
                (time.time() - time0) // 60, (time.time() - time0) % 60))

    print(
        'Train Accuracy: {}/{} ({:.2f}%)\tTrain Time: {:0f} minutes, {:2f} seconds\n'
        .format(correct, train_num, 100. * correct / train_num,
                (time.time() - time0) // 60, (time.time() - time0) % 60))

    if args.evaluate:
        print('\n================== VALIDATION ==================')
        model.eval()

        # set up validation metrics we want to track
        val_loss = 0.
        val_correct = 0
        val_num = args.eval_batch_size * args.val_batches

        # disable autograd here (replaces volatile flag from v0.3.1 and earlier)
        with torch.no_grad():
            for sparse, img, label in val_loader:
                sparse, label = sparse.to(device), label.to(device)
                logits = model(sparse)

                val_loss += F.cross_entropy(logits, label,
                                            size_average=False).item()

                pred = logits.max(1, keepdim=True)[1]
                val_correct += pred.eq(label.view_as(pred)).sum().item()

        # update current evaluation metrics
        val_loss /= val_num
        val_acc = 100. * val_correct / val_num
        print(
            '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'
            .format(val_loss, val_correct, val_num, val_acc))

        is_best = val_acc > best_val_acc
        if is_best:
            best_val_acc = val_acc
            best_val_loss = val_loss  # note this is val_loss of best model w.r.t. accuracy,
            # not the best val_loss throughout training

        # create checkpoint dictionary and save it;
        # if is_best, copy the file over to the file containing best model for this run
        state = {
            'state_dict': model.state_dict(),
            'optimizer_state': optimizer.state_dict(),
            'val_loss': val_loss,
            'val_acc': val_acc,
        }
        save_checkpoint(state, is_best, checkpoint_file)

    print('\n================== TESTING ==================')
    check = torch.load(checkpoint_file + '-best.pth.tar')
    model.load_state_dict(check['state_dict'])
    model.eval()

    test_loss = 0.
    test_correct = 0
    test_num = args.eval_batch_size * args.test_batches

    # disable autograd here (replaces volatile flag from v0.3.1 and earlier)
    with torch.no_grad():
        for sparse, img, label in test_loader:
            sparse, label = sparse.to(device), label.to(device)
            logits = model(sparse)
            test_loss += F.cross_entropy(logits, label,
                                         size_average=False).item()
            pred = logits.max(
                1, keepdim=True)[1]  # get the index of the max logit
            test_correct += pred.eq(label.view_as(pred)).sum().item()

    test_loss /= test_num
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, test_correct, test_num, 100. * test_correct / test_num))

    print('Final model stored at "{}".'.format(checkpoint_file +
                                               '-best.pth.tar'))
Exemple #16
0
  def createtarball(self):
    if os.path.exists(self.cvmfstarball) or os.path.exists(self.eostarball) or os.path.exists(self.foreostarball): return

    mkdir_p(self.workdir)
    with cd(self.workdir), KeepWhileOpenFile(self.tmptarball+".tmp", message=LSB_JOBID()) as kwof:
      if not kwof:
        with open(self.tmptarball+".tmp") as f:
          try:
            jobid = int(f.read().strip())
          except ValueError:
            return "try running again, probably you just got really bad timing"
        if jobended(str(jobid)):
          if self.makinggridpacksubmitsjob:
            os.remove(self.tmptarball+".tmp")
            return "job died at a very odd time, cleaned it up.  Try running again."
          for _ in os.listdir("."):            #--> delete everything in the folder, except the tarball if that exists
            if os.path.basename(_) != os.path.basename(self.tmptarball) and os.path.basename(_) != os.path.basename(self.tmptarball)+".tmp":
              try:
                os.remove(_)
              except OSError:
                shutil.rmtree(_)
          os.remove(os.path.basename(self.tmptarball)+".tmp") #remove that last
          return "gridpack job died, cleaned it up.  run makegridpacks.py again."
        else:
          return "job to make the tarball is already running"

      if self.gridpackjobsrunning:
        return "job to make the tarball is already running"

      if not os.path.exists(self.tmptarball):
        if not self.inthemiddleofmultistepgridpackcreation:
          for _ in os.listdir("."):
            if not _.endswith(".tmp"):
              try:
                os.remove(_)
              except OSError:
                shutil.rmtree(_)
        if not self.makinggridpacksubmitsjob and self.creategridpackqueue is not None:
          if not LSB_JOBID(): return "need to create the gridpack, submitting to LSF" if submitLSF(self.creategridpackqueue) else "need to create the gridpack, job is pending on LSF"
          if not queuematches(self.creategridpackqueue): return "need to create the gridpack, but on the wrong queue"
        for filename in self.makegridpackscriptstolink:
          os.symlink(filename, os.path.basename(filename))

        makinggridpacksubmitsjob = self.makinggridpacksubmitsjob

        #https://stackoverflow.com/a/17698359/5228524
        makegridpackstdout = ""
        pipe = subprocess.Popen(self.makegridpackcommand, stdout=subprocess.PIPE, bufsize=1)
        with pipe.stdout:
            for line in iter(pipe.stdout.readline, b''):
                print line,
                makegridpackstdout += line
        self.processmakegridpackstdout(makegridpackstdout)

        if makinggridpacksubmitsjob:
          return "submitted the gridpack creation job"
        if self.inthemiddleofmultistepgridpackcreation:
          return "ran one step of gridpack creation, run again to continue"

      mkdir_p(os.path.dirname(self.foreostarball))
      if self.patchkwargs:
        kwargs = self.patchkwargs
        for _ in "oldfilename", "newfilename", "sample": assert _ not in kwargs, _
        with cdtemp():
          kwargs["oldfilename"] = self.tmptarball
          kwargs["newfilename"] = os.path.abspath(os.path.basename(self.tmptarball))
          #kwargs["sample"] = self #???
          patches.dopatch(**kwargs)
          shutil.move(os.path.basename(self.tmptarball), self.tmptarball)

      if self.timeperevent is not None:
        del self.timeperevent
      shutil.move(self.tmptarball, self.foreostarball)
      shutil.rmtree(os.path.dirname(self.tmptarball))
      return "tarball is created and moved to this folder, to be copied to eos"
 def do_file_list(self):
     mkdir_p(self.file_list_dir)
     write(self.file_list_file_name(),
             '\n'.join(sorted(self.file_list)) + '\n')
Exemple #18
0
  cur_date = None
  cur_file = None

  user_string = "|".join(umap.keys())
def replace_uid(match):
    uid = match.group("id")
    if uid in umap:
      return "@%s"%umap[uid]
    return "<@%s>"%uid
  for message in api.message_generator(method, params, timestamp):
    datestring_short, datestring_long = get_date_string(message["ts"])
    if cur_file is None or cur_date != datestring_long:
      if not cur_file is None:
        cur_file.close()
      cur_date = datestring_long
      utilities.mkdir_p("%s/%s"%(directory, datestring_short))
      cur_file = open("%s/%s/%s.json"%(directory, datestring_short, datestring_long), "a")
    if config.replace_user_ids:
      if "user" in message and message["user"] in umap:
        message["user"] = umap[message["user"]]     
      if "text" in message:
        message["text"]  = re.sub("<@(?P<id>" + user_string + ")>", replace_uid, message["text"])
    cur_file.write("%s\n"%json.dumps(message))
  if cur_file:
    cur_file.close()


def scrape_channels(umap):
  print "Getting channels..."
  for channel in api.channel_generator():
    if not config.scrape_archived_channels and channel["is_archived"]:
Exemple #19
0
    def cardsurl(self):
        commit = self.genproductionscommit
        productioncardurl = os.path.join(
            "https://raw.githubusercontent.com/cms-sw/genproductions/", commit,
            self.productioncard.split("genproductions/")[-1])
        mdatascript = os.path.join(
            "https://raw.githubusercontent.com/cms-sw/genproductions/", commit,
            "bin/MCFM/ACmdataConfig.py")
        with cdtemp():
            with contextlib.closing(urllib.urlopen(productioncardurl)) as f:
                productiongitcard = f.read()

        with cdtemp():
            subprocess.check_output(["tar", "xvaf", self.cvmfstarball])
            if glob.glob("core.*"):
                raise ValueError(
                    "There is a core dump in the tarball\n{}".format(self))


#      for root, dirs, files in os.walk("."):
#	for ifile in files:
#	  try:
#	    os.stat(ifile)
#	  except Exception as e:
#	    if e.args == 'No such file or directory':   continue
#	    print ifile
#	    print e.message, e.args
#   	    raise ValueError("There is a broken symlink in the tarball\n{}".format(self))
            try:
                with open("readInput.DAT") as f:
                    productioncard = f.read()
            except IOError:
                raise ValueError(
                    "no readInput.DAT in the tarball\n{}".format(self))
            try:
                with open("src/User/mdata.f") as f:
                    mdatacard = f.read()
            except IOError:
                raise ValueError(
                    "no src/User/mdata.f in the tarball\n{}".format(self))

        if differentproductioncards(
                productioncard,
                productiongitcard) and not 'BKG' in self.identifiers:
            with cd(here):
                with open("productioncard", "w") as f:
                    f.write(productioncard)
                with open("productiongitcard", "w") as f:
                    f.write(productiongitcard)
            raise ValueError(
                "productioncard != productiongitcard\n{}\nSee ./productioncard and ./productiongitcard"
                .format(self))

        with contextlib.closing(
                urllib.urlopen(
                    os.path.join(
                        "https://raw.githubusercontent.com/cms-sw/genproductions/"
                        + commit + "/bin/MCFM/run_mcfm_AC.py"))) as f:
            infunction = False
            for line in f:
                if re.match(r"^\s*def .*", line): infunction = False
                if re.match(r"^\s*def downloadmcfm.*", line): infunction = True
                if not infunction: continue
                match = re.search(r"git checkout ([\w.]*)", line)
                if match: mcfmcommit = match.group(1)
        with cdtemp():
            mkdir_p("src/User")
            with cd("src/User"):
                wget(
                    os.path.join(
                        "https://raw.githubusercontent.com/usarica/MCFM-7.0_JHUGen",
                        mcfmcommit, "src/User/mdata.f"))
            wget(mdatascript)
            subprocess.check_call([
                "python",
                os.path.basename(mdatascript), "--coupling", self.coupling,
                "--mcfmdir", ".", "--bsisigbkg", self.signalbkgbsi
            ])
            with open("src/User/mdata.f") as f:
                mdatagitcard = f.read()

        if mdatacard != mdatagitcard and not 'BKG' in self.identifiers:
            with cd(here):
                with open("mdatacard", "w") as f:
                    f.write(mdatacard)
                with open("mdatagitcard", "w") as f:
                    f.write(mdatagitcard)
            raise ValueError(
                "mdatacard != mdatagitcard\n{}\nSee ./mdatacard and ./mdatagitcard"
                .format(self))

        result = (productioncardurl + "\n" + "# " + mdatascript + "\n" +
                  "#    --coupling " + self.coupling + " --bsisigbkg " +
                  self.signalbkgbsi)

        return result
 def copy_to_remote_authorized_keys(self):
     remote_ssh_dir = join(self.mount_point, 'root/.ssh')
     remote_authorized_keys = join(remote_ssh_dir, 'authorized_keys')
     our_public_key = file_content(expanduser('~/.ssh/id_rsa.pub'))
     mkdir_p(remote_ssh_dir)
     ensure_contains(remote_authorized_keys, our_public_key)
Exemple #21
0
def main(args):
    # reproducibility
    # need to seed numpy/torch random number generators
    if args.seed is not None:
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)
    # need directory with checkpoint files to recover previously trained models
    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)
    checkpoint_file = args.checkpoint + args.model + str(datetime.now())[:-10]

    # decide which device to use; assumes at most one GPU is available
    args.use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if args.use_cuda else "cpu")

    # decide if we're using a validation set;
    # if not, don't evaluate at end of epochs
    evaluate = args.train_split < 1.

    # prep data loaders
    if args.train_split == 1:
        train_loader, _, test_loader = prepare_data(args)
    else:
        train_loader, val_loader, test_loader = prepare_data(args)

    # build model
    if args.model == 'linear':
        model = Softmax().to(device)
    elif args.model == 'neuralnet':
        model = TwoLayer().to(device)
    else:
        model = ConvNet().to(device)

    # build optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 amsgrad=args.amsgrad)

    # setup validation metrics we want to track for tracking best model over training run
    best_val_loss = float('inf')
    best_val_acc = 0

    # set up tensorboard logger
    logger = LoggerX('test_mnist', 'mnist_data', 25)

    # loop over epochs
    for epoch in range(args.epochs):
        print('\n================== TRAINING ==================')
        model.train()  # set model to training mode
        # set up training metrics we want to track
        correct = 0
        train_num = len(train_loader.sampler)

        # metrics from logger
        model_metrics = CalculateMetrics(batch_size=args.batch_size,
                                         batches_per_epoch=len(train_loader))

        for ix, (img, label
                 ) in enumerate(train_loader):  # iterate over training batches
            img, label = img.to(device), label.to(
                device)  # get data, send to gpu if needed

            optimizer.zero_grad(
            )  # clear parameter gradients from previous training update
            output = model(img)  # forward pass
            loss = F.cross_entropy(output, label)  # calculate network loss
            loss.backward()  # backward pass
            optimizer.step(
            )  # take an optimization step to update model's parameters

            pred = output.max(
                1, keepdim=True)[1]  # get the index of the max logit
            # correct += pred.eq(label.view_as(pred)).sum().item() # add to running total of hits

            # convert this data to binary for the sake of testing the metrics functionality
            label[label < 5] = 0
            label[label > 0] = 1

            pred[pred < 5] = 0
            pred[pred > 0] = 1
            ######

            scores_dict = model_metrics.update_scores(label, pred)

            if ix % args.log_interval == 0:
                # log the metrics to tensorboard X, track best model according to current weighted average accuracy
                logger.log(model,
                           optimizer,
                           loss.item(),
                           track_score=scores_dict['weighted_acc'] /
                           model_metrics.bn,
                           scores_dict=scores_dict,
                           epoch=epoch,
                           bn=model_metrics.bn,
                           batches_per_epoch=model_metrics.batches_per_epoch)
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, model_metrics.bn, model_metrics.batches_per_epoch,
                    (model_metrics.bn / model_metrics.batches_per_epoch) * 100,
                    loss.item()))

        # print whole epoch's training accuracy; useful for monitoring overfitting
        print('Train Accuracy: ({:.0f}%)'.format(model_metrics.w_accuracy *
                                                 100))

        if evaluate:
            print('\n================== VALIDATION ==================')
            model.eval()  # set model to evaluate mode

            # set up validation metrics we want to track
            val_loss = 0.
            val_correct = 0
            val_num = len(val_loader.sampler)

            # disable autograd here (replaces volatile flag from v0.3.1 and earlier)
            with torch.no_grad():
                # loop over validation batches
                for img, label in val_loader:
                    img, label = img.to(device), label.to(
                        device)  # get data, send to gpu if needed
                    output = model(img)  # forward pass

                    # sum up batch loss
                    val_loss += F.cross_entropy(output,
                                                label,
                                                size_average=False).item()

                    # monitor for accuracy
                    pred = output.max(
                        1, keepdim=True)[1]  # get the index of the max logit
                    val_correct += pred.eq(
                        label.view_as(pred)).sum().item()  # add to total hits

            # update current evaluation metrics
            val_loss /= val_num
            val_acc = 100. * val_correct / val_num
            print(
                '\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'
                .format(val_loss, val_correct, val_num, val_acc))

            # check if best model according to accuracy;
            # if so, replace best metrics
            is_best = val_acc > best_val_acc
            if is_best:
                best_val_acc = val_acc
                best_val_loss = val_loss  # note this is val_loss of best model w.r.t. accuracy,
                # not the best val_loss throughout training

            # create checkpoint dictionary and save it;
            # if is_best, copy the file over to the file containing best model for this run
            state = {
                'epoch': epoch,
                'model': args.model,
                'state_dict': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'val_loss': val_loss,
                'best_val_loss': best_val_loss,
                'val_acc': val_acc,
                'best_val_acc': best_val_acc
            }
            save_checkpoint(state, is_best, checkpoint_file)

    print('\n================== TESTING ==================')
    # load best model from training run (according to validation accuracy)
    check = torch.load(logger.best_path)
    model.load_state_dict(check['state_dict'])
    model.eval()  # set model to evaluate mode

    # set up evaluation metrics we want to track
    test_loss = 0.
    test_correct = 0
    test_num = len(test_loader.sampler)

    test_metrics = CalculateMetrics(batch_size=args.batch_size,
                                    batches_per_epoch=test_num)
    # disable autograd here (replaces volatile flag from v0.3.1 and earlier)
    with torch.no_grad():
        for img, label in test_loader:
            img, label = img.to(device), label.to(device)
            output = model(img)
            # sum up batch loss
            test_loss += F.cross_entropy(output, label,
                                         size_average=False).item()
            pred = output.max(
                1, keepdim=True)[1]  # get the index of the max logit
            test_scores = test_metrics.update_scores(label, pred)
            logger.log(model,
                       optimizer,
                       test_loss,
                       test_scores['weighted_acc'],
                       test_scores,
                       phase='test')

    test_loss /= test_num
    print('Test set: Average loss: {:.4f}, Accuracy: ({:.0f}%)\n'.format(
        test_loss, test_metrics['weighted_acc'] * 100))

    print('Final model stored at "{}".'.format(checkpoint_file +
                                               '-best.pth.tar'))
Exemple #22
0
        'total_isolates':
        "{}{}/figures/total_isolates.png".format(save_path, drug),
        'Mean_MIC_trend_with_sd':
        "{}{}/figures/MIC_Trend_with_SD.png".format(save_path, drug),
        'error_path':
        "/home/rossco/Documents/web_projects/microbiology_data_portal/public/img/broken_robot.png"
    }

    if drug:
        if os.path.exists(save_path):
            if os.path.exists('{}{}/'.format(save_path, drug)):
                #Load previous results
                print(json.dumps(shred_string(data_locations)))
                sys.exit()
            else:
                mkdir_p("{}{}/".format(save_path, drug))
                mkdir_p("{}{}/figures/".format(save_path, drug))
                print(
                    json.dumps(
                        shred_string(
                            create_figures(myargs, save_path, pickle_file,
                                           start_date, end_date))))
        else:
            mkdir_p(save_path)
            mkdir_p("{}{}/".format(save_path, drug))
            mkdir_p("{}{}/figures/".format(save_path, drug))
            client = pymongo.MongoClient()
            extract = ExtractData(db_name=dbname, mongo_client=client)
            bug_data = extract.get_mic_data(organism=bug)
            file_name = '{}.pickle'.format(bug)
            extract.to_pickle(mic_data=bug_data,
 def createtarball(self):
   mkdir_p(os.path.dirname(self.foreostarball))
   return "making a phantom tarball is not automated, you have to make it yourself and put it in {}".format(self.foreostarball)
Exemple #24
0
  def cardsurl(self):
    def getcontents(f):
      contents = ""
      for line in f:
        line = line.split("!")[0]
        line = line.split("#")[0]
        line = line.strip()
        line = re.sub(" *= *", " = ", line)
        if not line: continue
        if line.startswith("define p = "): continue
        if line.startswith("define j = "): continue
        contents += line+"\n"
      return contents

    gitcardcontents = []
    if self.madgraphcardscript is None:
      cardurls = tuple(
        os.path.join(
          "https://raw.githubusercontent.com/cms-sw/genproductions/",
          self.genproductionscommit,
          (_[0] if len(_) == 2 else _).replace(genproductions+"/", "")
        ) for _ in self.madgraphcards
      )
      with cdtemp():
        for cardurl in cardurls:
          wget(cardurl)
          with open(os.path.basename(cardurl)) as f:
            gitcardcontents.append(getcontents(f))
    else:
      scripturls = tuple(
        os.path.join(
          "https://raw.githubusercontent.com/cms-sw/genproductions/",
          self.genproductionscommit,
          _.replace(genproductions+"/", "")
        ) for _ in self.madgraphcardscript
      )
      with cdtemp():
        wget(scripturls[0])
        for _ in scripturls[1:]:
          relpath = os.path.relpath(os.path.dirname(_), os.path.dirname(scripturls[0]))
          assert ".." not in relpath, relpath
          mkdir_p(relpath)
          with cd(relpath):
            wget(_)
        subprocess.check_call(["chmod", "u+x", os.path.basename(scripturls[0])])
        try:
          subprocess.check_output(["./"+os.path.basename(scripturls[0])], stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
          print e.output
          raise
        for _ in self.madgraphcards:
          if len(_) == 2: _ = _[0]
          with open(_) as f:
            gitcardcontents.append(getcontents(f))


    with cdtemp():
      subprocess.check_output(["tar", "xvaf", self.cvmfstarball])
      if glob.glob("core.*"):
        raise ValueError("There is a core dump in the tarball\n{}".format(self))
      cardnamesintarball = tuple(
        os.path.join("InputCards", os.path.basename(_[1] if len(_) == 2 else _))
        for _ in self.madgraphcards
      )
      cardcontents = []
      for cardnameintarball in cardnamesintarball:
        try:
          with open(cardnameintarball) as f:
            cardcontents.append(getcontents(f))
        except IOError:
          raise ValueError("no "+cardnameintarball+" in the tarball\n{}".format(self))
      for _ in glob.iglob("InputCards/*"):
        if _ not in cardnamesintarball and not _.endswith(".tar.gz") and _ not in self.otherthingsininputcards:
          raise ValueError("unknown thing "+_+" in InputCards\n{}".format(self))

    for name, cc, gcc in itertools.izip(cardnamesintarball, cardcontents, gitcardcontents):
      _, suffix = os.path.splitext(os.path.basename(name))
      if cc != gcc:
        with cd(here):
          with open("cardcontents"+suffix, "w") as f:
            f.write(cc)
          with open("gitcardcontents"+suffix, "w") as f:
            f.write(gcc)
        raise ValueError(name + " in tarball != " + name + " in git\n{}\nSee ./cardcontents{} and ./gitcardcontents{}".format(self, suffix, suffix))

    if self.madgraphcardscript:
      return "\n#    ".join((scripturls[0],) + tuple(self.madgraphcards))
    else:
      return "\n# ".join(cardurls)