def handle(self, *args, **options):
        skip_row = False
        frequency = options['frequency']
        if frequency != "daily" and frequency != "weekly":
            return self.stdout.write("Frequency argument can either be 'daily' or 'weekly'")

        silos = Silo.objects.filter(unique_fields__isnull=False, reads__autopull=True, reads__autopull_frequency__isnull=False, reads__autopull_frequency = frequency).distinct()
        read_type = ReadType.objects.get(read_type="JSON")
        for silo in silos:
            reads = silo.reads.filter(type=read_type.pk)
            for read in reads:
                ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA")
                response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
                data = json.loads(response.content)

                # import data into this silo
                num_rows = len(data)
                if num_rows == 0:
                    continue

                counter = None
                #loop over data and insert create and edit dates and append to dict
                for counter, row in enumerate(data):
                    skip_row = False
                    #if the value of unique column is already in existing_silo_data then skip the row
                    for unique_field in silo.unique_fields.all():
                        filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]}
                        if LabelValueStore.objects.filter(**filter_criteria).count() > 0:
                            skip_row = True
                            continue
                    if skip_row == True:
                        continue
                    # at this point, the unique column value is not in existing data so append it.
                    lvs = LabelValueStore()
                    lvs.silo_id = silo.pk
                    for new_label, new_value in row.iteritems():
                        if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                            setattr(lvs, new_label, new_value)
                    lvs.create_date = timezone.now()
                    result = lvs.save()

                if num_rows == (counter+1):
                    combineColumns(silo.pk)

                self.stdout.write('Successfully fetched the READ_ID, "%s", from ONA' % read.pk)
    def handle(self, *args, **options):
        silo = None
        read = None
        silo_id = options['silo_id']
        username = options['username']
        user = User.objects.get(username__exact=username)
        reads = Read.objects.filter(owner=user)

        try:
            silo = Silo.objects.get(pk=silo_id)
        except Silo.DoesNotExist:
            raise CommandError('Silo "%s" does not exist' % silo_id)

        for read_id in options['read_ids']:
            try:
                read = reads.filter(pk=read_id)[0]
            except Read.DoesNotExist:
                raise CommandError('Read "%s" does not exist for user, %s' % (read_id, user.username))

            # Fetch the data from ONA
            ona_token = ThirdPartyTokens.objects.get(user=user.pk, name="ONA")
            response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
            data = json.loads(response.content)

            # import data into this silo
            num_rows = len(data)
            if num_rows == 0:
                continue

            counter = None
            #loop over data and insert create and edit dates and append to dict
            for counter, row in enumerate(data):
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                for new_label, new_value in row.iteritems():
                    if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                        setattr(lvs, new_label, new_value)
                lvs.create_date = timezone.now()
                result = lvs.save()

            if num_rows == (counter+1):
                combineColumns(silo_id)

            self.stdout.write('Successfully fetched the READ_ID, "%s", from database' % read_id)
Example #3
0
def saveDataToSilo(silo, data):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    """
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    enc = "latin-1"
    for counter, row in enumerate(data):
        # reseting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                pass

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
        else:
            filter_criteria.update(
                {"nonexistentkey": "NEVER0101010101010NEVER"})

        try:
            lvs = LabelValueStore.objects.get(**filter_criteria)
            #print("updating")
            setattr(lvs, "edit_date", timezone.now())
        except LabelValueStore.DoesNotExist as e:
            lvs = LabelValueStore()
            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            #print("creating")
        except LabelValueStore.MultipleObjectsReturned as e:
            for k, v in filter_criteria.iteritems():
                skipped_rows.add("%s=%s" % (k, v))
            #print("skipping")
            continue

        counter = 0
        # set the fields in the curernt document and save it
        for key, val in row.iteritems():
            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "edit_date": key = "editted_date"
            elif key == "create_date": key = "created_date"
            if type(val) == str or type(val) == unicode:
                val = smart_str(val, strings_only=True)
            setattr(lvs, key.replace(".", "_").replace("$", "USD"), val)
            counter += 1
        lvs.save()

    combineColumns(silo.pk)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res
Example #4
0
def saveDataToSilo(silo, data):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    """
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    enc = "latin-1"
    for counter, row in enumerate(data):
        # reseting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                pass

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({"silo_id": silo.id})
        else:
            filter_criteria.update({"nonexistentkey": "NEVER0101010101010NEVER"})

        try:
            lvs = LabelValueStore.objects.get(**filter_criteria)
            # print("updating")
            setattr(lvs, "edit_date", timezone.now())
        except LabelValueStore.DoesNotExist as e:
            lvs = LabelValueStore()
            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            # print("creating")
        except LabelValueStore.MultipleObjectsReturned as e:
            for k, v in filter_criteria.iteritems():
                skipped_rows.add("%s=%s" % (k, v))
            # print("skipping")
            continue

        counter = 0
        # set the fields in the curernt document and save it
        for key, val in row.iteritems():
            if key == "" or key is None or key == "silo_id":
                continue
            elif key == "id" or key == "_id":
                key = "user_assigned_id"
            elif key == "edit_date":
                key = "editted_date"
            elif key == "create_date":
                key = "created_date"
            if type(val) == str or type(val) == unicode:
                val = smart_str(val, strings_only=True)
            setattr(lvs, key.replace(".", "_").replace("$", "USD"), val)
            counter += 1
        lvs.save()

    combineColumns(silo.pk)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res
Example #5
0
def save_data_to_silo(silo, data, read=-1, user=None):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    read -- the read object, optional only for backwards compatability
    user -- an optional parameter to use if its necessary to retrieve
            from ThirdPartyTokens
    """
    try:
        if read.type.read_type == "ONA" and user:
            saveOnaDataToSilo(silo, data, read, user)
        read_source_id = read.id
    except AttributeError:
        read_source_id = read
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    counter = 0
    keys = []
    try:
        keys = data.fieldnames
        keys = [cleanKey(key) for key in keys]
    except AttributeError as e:
        logger.warning(e)

    for counter, row in enumerate(data):
        # resetting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError as e:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                logger.info(e)

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})

            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                setattr(lvs, "edit_date", timezone.now())
                lvs.read_id = read_source_id
            except LabelValueStore.DoesNotExist:
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                lvs.create_date = timezone.now()
                lvs.read_id = read_source_id
            except LabelValueStore.MultipleObjectsReturned:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("{}={}".format(str(k), str(v)))
                continue
        else:
            lvs = LabelValueStore()

            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            lvs.read_id = read_source_id

        row = clean_data_obj(row)

        for key, val in row.iteritems():
            if not isinstance(key, tuple):
                if key not in keys:
                    keys.append(key)
                setattr(lvs, key, val)

        counter += 1
        lvs = calculateFormulaCell(lvs, silo)
        lvs.save()

    addColsToSilo(silo, keys)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res
Example #6
0
def saveDataToSilo(silo, data, read=-1, user=None):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    read -- the read object, optional only for backwards compatability
    user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens
    """
    try:
        if read.type.read_type == "ONA" and user:
            saveOnaDataToSilo(silo, data, read, user)
        read_source_id = read.id
    except AttributeError as e:
        read_source_id = read
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    keys = []
    try:
        keys = data.fieldnames
        keys = [cleanKey(key) for key in keys]
    except AttributeError:
        pass
    fieldToType = getColToTypeDict(silo)
    for counter, row in enumerate(data):
        # reseting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError as e:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                logger.info(e)

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            # else:
            #     filter_criteria.update({"nonexistentkey":"NEVER0101010101010NEVER"})

            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                #print("updating")
                setattr(lvs, "edit_date", timezone.now())
                lvs.read_id = read_source_id
            except LabelValueStore.DoesNotExist as e:
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                lvs.create_date = timezone.now()
                lvs.read_id = read_source_id
            except LabelValueStore.MultipleObjectsReturned as e:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("%s=%s" % (str(k), str(v)))
                #print("skipping")
                continue
        else:
            lvs = LabelValueStore()

            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            lvs.read_id = read_source_id

        counter = 0

        row = cleanDataObj(row, silo)

        for key, val in row.iteritems():
            # if key == "" or key is None or key == "silo_id": continue
            # elif key == "id" or key == "_id": key = "user_assigned_id"
            # elif key == "edit_date": key = "editted_date"
            # elif key == "create_date": key = "created_date"
            # if type(val) == str or type(val) == unicode:
            #     val = smart_str(val, strings_only=True).strip()
            # if fieldToType.get(key, 'string') == 'int':
            #     try:
            #         val = int(val)
            #     except ValueError as e:
            #         continue
            # if fieldToType.get(key, 'string') == 'double':
            #     try:
            #         val = float(val)
            #     except ValueError as e:
            #         continue

            if not isinstance(key, tuple):
                if key not in keys:
                    keys.append(key)
                setattr(lvs, key, val)

        counter += 1
        lvs = calculateFormulaCell(lvs, silo)
        lvs.save()
    addColsToSilo(silo, keys)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res