예제 #1
0
def save_data_to_silo(silo, data, read=-1, user=None):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    read -- the read object, optional only for backwards compatability
    user -- an optional parameter to use if its necessary to retrieve
            from ThirdPartyTokens
    """
    try:
        if read.type.read_type == "ONA" and user:
            saveOnaDataToSilo(silo, data, read, user)
        read_source_id = read.id
    except AttributeError:
        read_source_id = read
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    counter = 0
    keys = []
    try:
        keys = data.fieldnames
        keys = [cleanKey(key) for key in keys]
    except AttributeError as e:
        logger.warning(e)

    for counter, row in enumerate(data):
        # resetting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError as e:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                logger.info(e)

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})

            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                setattr(lvs, "edit_date", timezone.now())
                lvs.read_id = read_source_id
            except LabelValueStore.DoesNotExist:
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                lvs.create_date = timezone.now()
                lvs.read_id = read_source_id
            except LabelValueStore.MultipleObjectsReturned:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("{}={}".format(str(k), str(v)))
                continue
        else:
            lvs = LabelValueStore()

            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            lvs.read_id = read_source_id

        row = clean_data_obj(row)

        for key, val in row.iteritems():
            if not isinstance(key, tuple):
                if key not in keys:
                    keys.append(key)
                setattr(lvs, key, val)

        counter += 1
        lvs = calculateFormulaCell(lvs, silo)
        lvs.save()

    addColsToSilo(silo, keys)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res
예제 #2
0
def saveDataToSilo(silo, data, read=-1, user=None):
    """
    This saves data to the silo

    Keyword arguments:
    silo -- the silo object, which is meta data for its labe_value_store
    data -- a python list of dictionaries. stored in MONGODB
    read -- the read object, optional only for backwards compatability
    user -- an optional parameter to use if its necessary to retrieve from ThirdPartyTokens
    """
    try:
        if read.type.read_type == "ONA" and user:
            saveOnaDataToSilo(silo, data, read, user)
        read_source_id = read.id
    except AttributeError as e:
        read_source_id = read
    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    keys = []
    try:
        keys = data.fieldnames
        keys = [cleanKey(key) for key in keys]
    except AttributeError:
        pass
    fieldToType = getColToTypeDict(silo)
    for counter, row in enumerate(data):
        # reseting filter_criteria for each row
        filter_criteria = {}
        for uf in unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(row[uf.name])})
            except KeyError as e:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in the fetched dataset
                logger.info(e)

        # if filter_criteria is set, then update it with current silo_id
        # else set filter_criteria to some non-existent key and value so
        # that it triggers a DoesNotExist exception in order to create a new
        # document instead of updating an existing one.
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            # else:
            #     filter_criteria.update({"nonexistentkey":"NEVER0101010101010NEVER"})

            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                #print("updating")
                setattr(lvs, "edit_date", timezone.now())
                lvs.read_id = read_source_id
            except LabelValueStore.DoesNotExist as e:
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                lvs.create_date = timezone.now()
                lvs.read_id = read_source_id
            except LabelValueStore.MultipleObjectsReturned as e:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("%s=%s" % (str(k), str(v)))
                #print("skipping")
                continue
        else:
            lvs = LabelValueStore()

            lvs.silo_id = silo.pk
            lvs.create_date = timezone.now()
            lvs.read_id = read_source_id

        counter = 0

        row = cleanDataObj(row, silo)

        for key, val in row.iteritems():
            # if key == "" or key is None or key == "silo_id": continue
            # elif key == "id" or key == "_id": key = "user_assigned_id"
            # elif key == "edit_date": key = "editted_date"
            # elif key == "create_date": key = "created_date"
            # if type(val) == str or type(val) == unicode:
            #     val = smart_str(val, strings_only=True).strip()
            # if fieldToType.get(key, 'string') == 'int':
            #     try:
            #         val = int(val)
            #     except ValueError as e:
            #         continue
            # if fieldToType.get(key, 'string') == 'double':
            #     try:
            #         val = float(val)
            #     except ValueError as e:
            #         continue

            if not isinstance(key, tuple):
                if key not in keys:
                    keys.append(key)
                setattr(lvs, key, val)

        counter += 1
        lvs = calculateFormulaCell(lvs, silo)
        lvs.save()
    addColsToSilo(silo, keys)
    res = {"skipped_rows": skipped_rows, "num_rows": counter}
    return res