Example #1
0
    def create_structure(self):
        """
        Creates the necessary model input data in the database to ingest against.
        :return: An integer representing the replication created.
        """
        myrun = DimRun()
        myrun.name = "AUTO SCOTTY TEST RUN"
        myrun.models_key_id = 1
        myrun.save()

        mychannel = DimChannel.objects.get(pk=228441)
        myrun.dimchannel_set.add(mychannel)

        myex = DimExecution()
        myex.run_key_id = myrun.id
        myex.save()

        myrep = DimReplication()
        myrep.execution_key_id = myex.id
        myrep.seed_used = 99
        myrep.series_id = 100
        myrep.save()

        self.model_input.append([myrun.id, myex.id, myrep.id, mychannel.id])

        return myrep.id
Example #2
0
    def __init__(self, zip_file=None, execid=None, seriesid=None, chan_list=None, msg=None):
        """
        This init method will take several arguments and use those to ingest data into the vecnet-CI datawarehouse

        This takes a zip file (the path to the zip file), execid, seriesid, and chan_list.  Given this information,
        this ingester will create a replication, and then fill that replication with the data in the zip file.  If
        execid and/or seriesid are not specified, the filename is assumed to be of the type "REPID-XXXX.zip" where
        the replication id will be parsed from the file name.

        :param str zip_file: A path to a zip file on the file system.
        :raises TypeError: When any file address is not of type str
        :raises ObjectDoesNotExist: When the replication_id given does not exist in the data warehouse
        """
        self.FileList = dict()
        self.DataList = dict()
        self.alreadyIngested = None
        # the files needed by EMOD
        self.FILES_OF_INTEREST = ('DemographicsSummary', 'VectorSpeciesReport', 'InsetChart', 'BinnedReport')
        # create a unique temporary directory for the needed files
        self.BASE_PATH = os.path.sep + 'tmp'
        self.temporary_path = self.BASE_PATH + os.path.sep + str(uuid.uuid4())
        # get the replication ID from the filename. filename is expected to be of the following format
        #   path/to/file/file_name-replication_number.zip

        if not execid and not seriesid:
            replication_id = int(os.path.basename(zip_file).strip(".zip").split("-")[1])
            try:
                self.replication = DimReplication.objects.get(pk=replication_id)
            except:
                raise ObjectDoesNotExist('No replication with id %s is available in the DB' % replication_id)
        else:
            execution = DimExecution.objects.filter(pk=execid)
            if not execution.exists():
                raise ObjectDoesNotExist("No execution with id %s is available in the DB" % execid)
            replication = execution[0].dimreplication_set.filter(series_id=seriesid)
            if not replication.exists():
                replication = DimReplication(
                    execution_key=execution[0],
                    series_id=seriesid,
                    seed_used=-1
                )
                replication.save()
                self.replication = replication
            else:
                self.replication = replication[0]

        if zip_file is None:
            if msg is None:
                msg = "An error has occurred during the processing of this replication"
            self.set_status(-1, msg)
            return

        input_files = self.unpack_files(zip_file)
        VSRF = input_files['VectorSpeciesReport'] if 'VectorSpeciesReport' in input_files else ''
        DSF = input_files['DemographicsSummary'] if 'DemographicsSummary' in input_files else ''
        ICF = input_files['InsetChart'] if 'InsetChart' in input_files else ''
        BRF = input_files['BinnedReport'] if 'BinnedReport' in input_files else ''

        if not isinstance(VSRF, str):
            raise TypeError('VSRF must be a string containing the file address to the VSRF')
        elif VSRF is not '':
            self.FileList['VectorSpeciesReport'] = VSRF
        if not isinstance(DSF, str):
            raise TypeError('DSF must be a string containing the file address to the DSF')
        elif DSF is not '':
            self.FileList['DemographicsSummary'] = DSF
        if not isinstance(ICF, str):
            raise TypeError('ICF must be a string containing the file address to the ICF')
        elif ICF is not '':
            self.FileList['InsetChart'] = ICF
        if not isinstance(BRF, str):
            raise TypeError('BRF must be a string containing the file address to the BRF')
        elif BRF is not '':
            self.FileList['BinnedReport'] = BRF

        # -------------- Grab the channel listing and other objects needed for the ingester
        # replication = DimReplication.objects.filter(pk=replication_id)
        # if not replication.exists():
        #    raise ObjectDoesNotExist('Replication with id %s does not exist' % replication_id)
        # replication = replication[0]
        self.run = self.replication.execution_key.run_key

        # Attach CSV output for non-sweep runs only
        if self.run.numjobs() == 1:
            convert_to_csv(self.temporary_path)
            filename = os.path.join(self.temporary_path, "output.csv.zip")
            csv_file = SimulationInputFile.objects.create_file(contents=open(filename, "rb").read(),
                                                               name="output.csv.zip",
                                                               created_by_id=1)

            self.run.csv_output = csv_file
            self.run.save()

        if self.run.models_key.model != 'EMOD':
            raise ValueError("Target Run is not EMOD during EMOD submission!")
        if chan_list is not None and isinstance(chan_list, list):
            chans = DimChannel.objects.filter(pk__in=chan_list)
            if not chans.exists():
                raise ObjectDoesNotExist('No channels were found with IDs %s' % chan_list)
            new_chans = [self.run.dimchannel_set.add(channel) for channel in chans]
            if len(new_chans) != len(chans):
                print "WARNING: Not all channels in list were ingested"
            self.run.save()
        else:
            chans = self.run.dimchannel_set.all()

        self.Channel_dict = dict()
        for channel in chans:
            if channel.file_name not in self.Channel_dict:
                self.Channel_dict[channel.file_name] = list()
            self.Channel_dict[channel.file_name].append(channel)
        self.Channel_dict['VectorSpeciesReport'] = list()

        self.alreadyIngested = BaseFactData.objects.filter(
            run_key=self.run,
            replication_key=self.replication
        ).distinct('channel_key')

        self.alreadyIngested = [data.channel_key for data in self.alreadyIngested]
        # self.Channel_list = [x.file_name for x in chans]

        # Setup class variables
        self.fact_data_name = 'fact_data_run_%s' % self.run.id

        self.cursor = connections['default'].cursor()

        self.cursor.execute("select nextval('base_fact_data_id_seq');")
        self.next_id = int(self.cursor.fetchone()[0])
Example #3
0
    def handle(self, *args, **options):
        print "BEGIN TEST"
        # Create execution, replication, and run
        myrun = DimRun()
        myrun.name = "AUTO SCOTTY TEST RUN"
        myrun.models_key_id = 1
        myrun.save()

        mychannel = DimChannel.objects.get(pk=228441)
        myrun.dimchannel_set.add(mychannel)

        myex = DimExecution()
        myex.run_key_id = myrun.id
        myex.save()

        myrep = DimReplication()
        myrep.execution_key_id = myex.id
        myrep.seed_used = 99
        myrep.series_id = 100
        myrep.save()

        replication_number = myrep.id
        print "REPLICATION NUMBER", replication_number
        print "CHANNEL NUMBER", mychannel.id
        print "RUN NUMBER", myrun.id
        print "EXECUTION NUMBER", myex.id
        # rename the test zip_file
        oldfilename = options['filename']
        oldlist = oldfilename.split(".")
        newfilename = oldlist[0] + "-" + str(replication_number) + "." + oldlist[1]
        shutil.copyfile(oldfilename, newfilename)
        # make sure this replication output data doesn't already exist
        results = [i for i in BaseFactData.objects.filter(replication_key_id=replication_number,
                                                          channel_key_id=mychannel.id, run_key_id=myrun.id)]
        count = len(results)
        if count != 0:
            print "There currently exists data in BaseFactData for the chosen replication, channel, and run." \
                  "Please choose a replication, channel, and run which have not previously been ingested."

        # Curl the files to AutoScotty
        print "Beam us up, Scotty.\nAye, Sir."
        print "File: ", newfilename
        f = open(newfilename, 'rb')
        files = {'zip_file': f}
        urlname = options['urlname']
        model_type = options['modeltype']
        if options['hashname']:
            r = requests.post(urlname, files=files, data={'model_type': model_type, 'sync': True,
                                                          'zip_file_hash': options['hashname']})
        else:
            myhash = hashlib.sha1()
            myhash.update(f.read())
            f.seek(0)
            r = requests.post(urlname, files=files, data={'model_type': model_type, 'sync': True,
                                                          'zip_file_hash': myhash.hexdigest()})

        # remove the temporary copy of the file
        os.remove(newfilename)

        if r.status_code != 200:
            print "There was an ingestion error at the server. The HTTP Response code is: ", r.status_code
            print "Please check the celery and apache logs on the server to determine the source of the error."

        # fetch the ingested results
        results = BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id,
                                              run_key_id=myrun.id).aggregate(Count("timestep"))
        count = results['timestep__count']

        if count == 10950:
            print "The anticipated number of entries in BaseFactData were present. The data was successfully ingested."
        else:
            print "The count of " + str(count) + " is not the expected number of entries. Please make sure channel " \
                                                 "228441 exists, and that you used the 'autoscottytest.zip' file."

        # remove the ingested data
        BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id,
                                    run_key_id=myrun.id).delete()
        myrun.dimchannel_set.remove(mychannel)
        myrep.delete()
        myex.delete()
        myrun.delete()

        # make sure data was deleted
        results = BaseFactData.objects.filter(replication_key_id=replication_number, channel_key_id=mychannel.id,
                                              run_key_id=myrun.id).aggregate(Count("timestep"))
        count = results['timestep__count']
        if count == 0:
            print "The data was successfully purged."
        else:
            print "There was an error removing the ingested data.  There are still entries in BaseFactData " \
                  "corresponding to your replication, channel, and run. Please see the server logs and the database " \
                  "administrator for assistance."

        # return response
        print "Live long and prosper."