def create_structure(self): """ Creates the necessary model input data in the database to ingest against. :return: An integer representing the replication created. """ myrun = DimRun() myrun.name = "AUTO SCOTTY TEST RUN" myrun.models_key_id = 1 myrun.save() mychannel = DimChannel.objects.get(pk=228441) myrun.dimchannel_set.add(mychannel) myex = DimExecution() myex.run_key_id = myrun.id myex.save() myrep = DimReplication() myrep.execution_key_id = myex.id myrep.seed_used = 99 myrep.series_id = 100 myrep.save() self.model_input.append([myrun.id, myex.id, myrep.id, mychannel.id]) return myrep.id
def __init__(self, zip_file=None, execid=None, seriesid=None, chan_list=None, msg=None): """ This init method will take several arguments and use those to ingest data into the vecnet-CI datawarehouse This takes a zip file (the path to the zip file), execid, seriesid, and chan_list. Given this information, this ingester will create a replication, and then fill that replication with the data in the zip file. If execid and/or seriesid are not specified, the filename is assumed to be of the type "REPID-XXXX.zip" where the replication id will be parsed from the file name. :param str zip_file: A path to a zip file on the file system. :raises TypeError: When any file address is not of type str :raises ObjectDoesNotExist: When the replication_id given does not exist in the data warehouse """ self.FileList = dict() self.DataList = dict() self.alreadyIngested = None # the files needed by EMOD self.FILES_OF_INTEREST = ('DemographicsSummary', 'VectorSpeciesReport', 'InsetChart', 'BinnedReport') # create a unique temporary directory for the needed files self.BASE_PATH = os.path.sep + 'tmp' self.temporary_path = self.BASE_PATH + os.path.sep + str(uuid.uuid4()) # get the replication ID from the filename. filename is expected to be of the following format # path/to/file/file_name-replication_number.zip if not execid and not seriesid: replication_id = int(os.path.basename(zip_file).strip(".zip").split("-")[1]) try: self.replication = DimReplication.objects.get(pk=replication_id) except: raise ObjectDoesNotExist('No replication with id %s is available in the DB' % replication_id) else: execution = DimExecution.objects.filter(pk=execid) if not execution.exists(): raise ObjectDoesNotExist("No execution with id %s is available in the DB" % execid) replication = execution[0].dimreplication_set.filter(series_id=seriesid) if not replication.exists(): replication = DimReplication( execution_key=execution[0], series_id=seriesid, seed_used=-1 ) replication.save() self.replication = replication else: self.replication = replication[0] if zip_file is None: if msg is None: msg = "An error has occurred during the processing of this replication" self.set_status(-1, msg) return input_files = self.unpack_files(zip_file) VSRF = input_files['VectorSpeciesReport'] if 'VectorSpeciesReport' in input_files else '' DSF = input_files['DemographicsSummary'] if 'DemographicsSummary' in input_files else '' ICF = input_files['InsetChart'] if 'InsetChart' in input_files else '' BRF = input_files['BinnedReport'] if 'BinnedReport' in input_files else '' if not isinstance(VSRF, str): raise TypeError('VSRF must be a string containing the file address to the VSRF') elif VSRF is not '': self.FileList['VectorSpeciesReport'] = VSRF if not isinstance(DSF, str): raise TypeError('DSF must be a string containing the file address to the DSF') elif DSF is not '': self.FileList['DemographicsSummary'] = DSF if not isinstance(ICF, str): raise TypeError('ICF must be a string containing the file address to the ICF') elif ICF is not '': self.FileList['InsetChart'] = ICF if not isinstance(BRF, str): raise TypeError('BRF must be a string containing the file address to the BRF') elif BRF is not '': self.FileList['BinnedReport'] = BRF # -------------- Grab the channel listing and other objects needed for the ingester # replication = DimReplication.objects.filter(pk=replication_id) # if not replication.exists(): # raise ObjectDoesNotExist('Replication with id %s does not exist' % replication_id) # replication = replication[0] self.run = self.replication.execution_key.run_key # Attach CSV output for non-sweep runs only if self.run.numjobs() == 1: convert_to_csv(self.temporary_path) filename = os.path.join(self.temporary_path, "output.csv.zip") csv_file = SimulationInputFile.objects.create_file(contents=open(filename, "rb").read(), name="output.csv.zip", created_by_id=1) self.run.csv_output = csv_file self.run.save() if self.run.models_key.model != 'EMOD': raise ValueError("Target Run is not EMOD during EMOD submission!") if chan_list is not None and isinstance(chan_list, list): chans = DimChannel.objects.filter(pk__in=chan_list) if not chans.exists(): raise ObjectDoesNotExist('No channels were found with IDs %s' % chan_list) new_chans = [self.run.dimchannel_set.add(channel) for channel in chans] if len(new_chans) != len(chans): print "WARNING: Not all channels in list were ingested" self.run.save() else: chans = self.run.dimchannel_set.all() self.Channel_dict = dict() for channel in chans: if channel.file_name not in self.Channel_dict: self.Channel_dict[channel.file_name] = list() self.Channel_dict[channel.file_name].append(channel) self.Channel_dict['VectorSpeciesReport'] = list() self.alreadyIngested = BaseFactData.objects.filter( run_key=self.run, replication_key=self.replication ).distinct('channel_key') self.alreadyIngested = [data.channel_key for data in self.alreadyIngested] # self.Channel_list = [x.file_name for x in chans] # Setup class variables self.fact_data_name = 'fact_data_run_%s' % self.run.id self.cursor = connections['default'].cursor() self.cursor.execute("select nextval('base_fact_data_id_seq');") self.next_id = int(self.cursor.fetchone()[0])
def handle(self, *args, **options): print "BEGIN TEST" # Create execution, replication, and run myrun = DimRun() myrun.name = "AUTO SCOTTY TEST RUN" myrun.models_key_id = 1 myrun.save() mychannel = DimChannel.objects.get(pk=228441) myrun.dimchannel_set.add(mychannel) myex = DimExecution() myex.run_key_id = myrun.id myex.save() myrep = DimReplication() myrep.execution_key_id = myex.id myrep.seed_used = 99 myrep.series_id = 100 myrep.save() replication_number = myrep.id print "REPLICATION NUMBER", replication_number print "CHANNEL NUMBER", mychannel.id print "RUN NUMBER", myrun.id print "EXECUTION NUMBER", myex.id # rename the test zip_file oldfilename = options['filename'] oldlist = oldfilename.split(".") newfilename = oldlist[0] + "-" + str(replication_number) + "." + oldlist[1] shutil.copyfile(oldfilename, newfilename) # make sure this replication output data doesn't already exist results = [i for i in BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id, run_key_id=myrun.id)] count = len(results) if count != 0: print "There currently exists data in BaseFactData for the chosen replication, channel, and run." \ "Please choose a replication, channel, and run which have not previously been ingested." # Curl the files to AutoScotty print "Beam us up, Scotty.\nAye, Sir." print "File: ", newfilename f = open(newfilename, 'rb') files = {'zip_file': f} urlname = options['urlname'] model_type = options['modeltype'] if options['hashname']: r = requests.post(urlname, files=files, data={'model_type': model_type, 'sync': True, 'zip_file_hash': options['hashname']}) else: myhash = hashlib.sha1() myhash.update(f.read()) f.seek(0) r = requests.post(urlname, files=files, data={'model_type': model_type, 'sync': True, 'zip_file_hash': myhash.hexdigest()}) # remove the temporary copy of the file os.remove(newfilename) if r.status_code != 200: print "There was an ingestion error at the server. The HTTP Response code is: ", r.status_code print "Please check the celery and apache logs on the server to determine the source of the error." # fetch the ingested results results = BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id, run_key_id=myrun.id).aggregate(Count("timestep")) count = results['timestep__count'] if count == 10950: print "The anticipated number of entries in BaseFactData were present. The data was successfully ingested." else: print "The count of " + str(count) + " is not the expected number of entries. Please make sure channel " \ "228441 exists, and that you used the 'autoscottytest.zip' file." # remove the ingested data BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id, run_key_id=myrun.id).delete() myrun.dimchannel_set.remove(mychannel) myrep.delete() myex.delete() myrun.delete() # make sure data was deleted results = BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id, run_key_id=myrun.id).aggregate(Count("timestep")) count = results['timestep__count'] if count == 0: print "The data was successfully purged." else: print "There was an error removing the ingested data. There are still entries in BaseFactData " \ "corresponding to your replication, channel, and run. Please see the server logs and the database " \ "administrator for assistance." # return response print "Live long and prosper."