예제 #1
0
파일: views.py 프로젝트: as3adm/tola
def doMerge(request):
    from_silo_id = request.POST['from_silo_id']
    to_silo_id = request.POST["to_silo_id"]

    try:
        from_silo_id = int(from_silo_id)
        to_silo_id = int(to_silo_id)
    except ValueError as e:
        from_silo_id = None
        to_silo_id = None
        print("The from_silo_id and/or the to_silo_id is not an integer")
    
    #conn = pymongo.Connection()
    #db = conn.tola
    client = MongoClient(uri)
    db = client.tola
    
    if from_silo_id != None and to_silo_id != None:
        for k in request.POST:
            if k != "silo_id" and k !=  "_id" and k != "to_silo_id" and k != "from_silo_id" and k != "csrfmiddlewaretoken": 
                from_field = request.POST.getlist(k)[0].lower()
                to_field = request.POST.getlist(k)[1].lower()
            
                if to_field == "Ignore":
                    "This field should be deleted from the silo_id = 'from_silo_id'"
                    #print ("FROM FIELD: %s and SILO_ID: %s" % (from_field, from_silo_id))
                    db.label_value_store.update_many( 
                        { "silo_id": from_silo_id }, 
                        { 
                            "$unset": {from_field: ""}, 
                        }, 
                        False #, False,  None, True 
                    )
                elif to_field == "0":
                    "Nothing should be done in this case because when the silo_id is updated to to_silo_id this field will become part of the to_silo_id "
                    pass
                else:
                    if from_field != to_field:
                        db.label_value_store.update_many(
                            { "silo_id": from_silo_id }, 
                            { 
                                "$rename": { from_field:  to_field },  
                                "$currentDate": { 'edit_date': True } 
                            }, 
                            False
                        )

        db.label_value_store.update_many( 
            { "silo_id": from_silo_id }, 
            { 
                "$set": { "silo_id": to_silo_id }, 
            }, 
            False #, False, None, True 
        )
        Silo.objects.filter(pk = from_silo_id).delete()
        
        combineColumns(to_silo_id)
    #messages.success(request, "Silos merged successfully")
    return HttpResponseRedirect("/silo_detail/%s" % to_silo_id)
예제 #2
0
def uploadFile(request, id):
    """
    Upload CSV file and save its data
    """
    if request.method == 'POST':
        form = UploadForm(request.POST)
        if form.is_valid():
            read_obj = Read.objects.get(pk=id)
            today = datetime.date.today()
            today.strftime('%Y-%m-%d')
            today = str(today)

            silo = None
            user = User.objects.get(username__exact=request.user)

            if request.POST.get("new_silo", None):
                silo = Silo(name=request.POST['new_silo'], owner=user, public=False, create_date=today)
                silo.save()
            else:
                silo = Silo.objects.get(id = request.POST["silo_id"])

            silo.reads.add(read_obj)
            silo_id = silo.id

            #create object from JSON String
            data = csv.reader(read_obj.file_data)
            labels = None
            try:
                labels = data.next() #First row of CSV should be Column Headers
            except IOError as e:
                messages.error(request, "The CSV file could not be found")
                return HttpResponseRedirect(reverse_lazy('showRead', kwargs={'id': read_obj.id},))

            for row in data:
                lvs = LabelValueStore()
                lvs.silo_id = silo_id
                for col_counter, val in enumerate(row):
                    key = str(labels[col_counter]).replace(".", "_").replace("$", "USD")
                    if key != "" and key is not None and key != "silo_id" and key != "id" and key != "_id":
                        if key == "create_date": key = "created_date"
                        if key == "edit_date": key = "editted_date"
                        setattr(lvs, key, val)
                lvs.create_date = timezone.now()
                lvs.save()
            combineColumns(silo_id)
            return HttpResponseRedirect('/silo_detail/' + str(silo_id) + '/')
        else:
            messages.error(request, "There was a problem with reading the contents of your file" + form.errors)
            #print form.errors

    user = User.objects.get(username__exact=request.user)
    # get all of the silo info to pass to the form
    get_silo = Silo.objects.filter(owner=user)

    # display login form
    return render(request, 'read/file.html', {
        'read_id': id, 'get_silo': get_silo,
    })
예제 #3
0
def getJSON(request):
    """
    Get JSON feed info from form then grab data
    """
    if request.method == 'POST':
        # retrieve submitted Feed info from database
        read_obj = Read.objects.get(id = request.POST.get("read_id", None))

        # set date time stamp
        today = datetime.date.today()
        today.strftime('%Y-%m-%d')
        today = str(today)
        try:
            request2 = urllib2.Request(read_obj.read_url)
            #if they passed in a usernmae get auth info from form post then encode and add to the request header
            if request.POST['user_name']:
                username = request.POST['user_name']
                password = request.POST['password']
                base64string = base64.encodestring('%s:%s' % (username, password))[:-1]
                request2.add_header("Authorization", "Basic %s" % base64string)
            #retrieve JSON data from formhub via auth info
            json_file = urllib2.urlopen(request2)
        except Exception as e:
            #print e
            messages.error(request, 'Authentication Failed, Please double check your login credentials and URL!')

        silo = None

        user = User.objects.get(username__exact=request.user)
        if request.POST.get("new_silo", None):
            silo = Silo(name=request.POST['new_silo'], owner=user, public=False, create_date=today)
            silo.save()
        else:
            silo = Silo.objects.get(id = request.POST["silo_id"])

        silo.reads.add(read_obj)
        silo_id = silo.id

        #create object from JSON String
        data = json.load(json_file)
        json_file.close()

        #loop over data and insert create and edit dates and append to dict
        for row in data:
            lvs = LabelValueStore()
            lvs.silo_id = silo_id
            for new_label, new_value in row.iteritems():
                if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                    setattr(lvs, new_label, new_value)
            lvs.create_date = timezone.now()
            lvs.save()
        combineColumns(silo_id)
        messages.success(request, "Data imported successfully.")
        return HttpResponseRedirect('/silo_detail/' + str(silo_id) + '/')
    else:
        messages.error(request, "Invalid Request for importing JSON data")
        return HttpResponseRedirect("/")
예제 #4
0
def import_from_google_spreadsheet(credential_json, silo, spreadsheet_key):
    sp_client = get_authorized_sp_client(credential_json)

    # Create a WorksheetQuery object to allow for filtering for worksheets by the title
    worksheet_query = gdata.spreadsheets.client.WorksheetQuery(
        title="Sheet1", title_exact=True)
    # Get a feed of all worksheets in the specified spreadsheet that matches the worksheet_query
    worksheets_feed = sp_client.get_worksheets(spreadsheet_key)

    # Retrieve the worksheet_key from the first match in the worksheets_feed object
    worksheet_key = worksheets_feed.entry[0].id.text.rsplit("/", 1)[1]

    ws = worksheets_feed.entry[0]
    #print '%s - rows %s - cols %s\n' % (ws.title.text, ws.row_count.text, ws.col_count.text)
    lvs = LabelValueStore()

    list_feed = sp_client.get_list_feed(spreadsheet_key, worksheet_key)

    for row in list_feed.entry:
        row_data = row.to_dict()
        skip_row = False
        for key, val in row_data.iteritems():
            #if the value of unique column is already in existing_silo_data then skip the row
            for unique_field in silo.unique_fields.all():
                filter_criteria = {'silo_id': silo.id, unique_field.name: val}
                if LabelValueStore.objects.filter(
                        **filter_criteria).count() > 0:
                    skip_row = True
                    continue
            if skip_row == True:
                break

            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "create_date": key = "created_date"
            elif key == "edit_date": key = "editted_date"
            setattr(lvs, key, val)
        if skip_row == True:
            continue
        lvs.silo_id = silo.id
        lvs.create_date = timezone.now()
        lvs.save()
        lvs = LabelValueStore()

    combineColumns(silo.id)

    return True
예제 #5
0
def import_from_google_spreadsheet(credential_json, silo, spreadsheet_key):
    sp_client = get_authorized_sp_client(credential_json)

    # Create a WorksheetQuery object to allow for filtering for worksheets by the title
    worksheet_query = gdata.spreadsheets.client.WorksheetQuery(title="Sheet1", title_exact=True)
    # Get a feed of all worksheets in the specified spreadsheet that matches the worksheet_query
    worksheets_feed = sp_client.get_worksheets(spreadsheet_key)

    # Retrieve the worksheet_key from the first match in the worksheets_feed object
    worksheet_key = worksheets_feed.entry[0].id.text.rsplit("/", 1)[1]

    ws = worksheets_feed.entry[0]
    #print '%s - rows %s - cols %s\n' % (ws.title.text, ws.row_count.text, ws.col_count.text)
    lvs = LabelValueStore()

    list_feed = sp_client.get_list_feed(spreadsheet_key, worksheet_key)

    for row in list_feed.entry:
        row_data = row.to_dict()
        skip_row = False
        for key, val in row_data.iteritems():
            #if the value of unique column is already in existing_silo_data then skip the row
            for unique_field in silo.unique_fields.all():
                filter_criteria = {'silo_id': silo.id, unique_field.name: val}
                if LabelValueStore.objects.filter(**filter_criteria).count() > 0:
                    skip_row = True
                    continue
            if skip_row == True:
                break

            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "create_date": key = "created_date"
            elif key == "edit_date": key = "editted_date"
            setattr(lvs, key, val)
        if skip_row == True:
            continue
        lvs.silo_id = silo.id
        lvs.create_date = timezone.now()
        lvs.save()
        lvs = LabelValueStore()

    combineColumns(silo.id)

    return True
예제 #6
0
    def handle(self, *args, **options):
        skip_row = False
        frequency = options['frequency']
        if frequency != "daily" and frequency != "weekly":
            return self.stdout.write("Frequency argument can either be 'daily' or 'weekly'")

        silos = Silo.objects.filter(unique_fields__isnull=False, reads__autopull=True, reads__autopull_frequency__isnull=False, reads__autopull_frequency = frequency).distinct()
        read_type = ReadType.objects.get(read_type="JSON")
        for silo in silos:
            reads = silo.reads.filter(type=read_type.pk)
            for read in reads:
                ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA")
                response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
                data = json.loads(response.content)

                # import data into this silo
                num_rows = len(data)
                if num_rows == 0:
                    continue

                counter = None
                #loop over data and insert create and edit dates and append to dict
                for counter, row in enumerate(data):
                    skip_row = False
                    #if the value of unique column is already in existing_silo_data then skip the row
                    for unique_field in silo.unique_fields.all():
                        filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]}
                        if LabelValueStore.objects.filter(**filter_criteria).count() > 0:
                            skip_row = True
                            continue
                    if skip_row == True:
                        continue
                    # at this point, the unique column value is not in existing data so append it.
                    lvs = LabelValueStore()
                    lvs.silo_id = silo.pk
                    for new_label, new_value in row.iteritems():
                        if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                            setattr(lvs, new_label, new_value)
                    lvs.create_date = timezone.now()
                    result = lvs.save()

                if num_rows == (counter+1):
                    combineColumns(silo.pk)

                self.stdout.write('Successfully fetched the READ_ID, "%s", from ONA' % read.pk)
예제 #7
0
    def handle(self, *args, **options):
        silo = None
        read = None
        silo_id = options['silo_id']
        username = options['username']
        user = User.objects.get(username__exact=username)
        reads = Read.objects.filter(owner=user)

        try:
            silo = Silo.objects.get(pk=silo_id)
        except Silo.DoesNotExist:
            raise CommandError('Silo "%s" does not exist' % silo_id)

        for read_id in options['read_ids']:
            try:
                read = reads.filter(pk=read_id)[0]
            except Read.DoesNotExist:
                raise CommandError('Read "%s" does not exist for user, %s' % (read_id, user.username))

            # Fetch the data from ONA
            ona_token = ThirdPartyTokens.objects.get(user=user.pk, name="ONA")
            response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
            data = json.loads(response.content)

            # import data into this silo
            num_rows = len(data)
            if num_rows == 0:
                continue

            counter = None
            #loop over data and insert create and edit dates and append to dict
            for counter, row in enumerate(data):
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                for new_label, new_value in row.iteritems():
                    if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                        setattr(lvs, new_label, new_value)
                lvs.create_date = timezone.now()
                result = lvs.save()

            if num_rows == (counter+1):
                combineColumns(silo_id)

            self.stdout.write('Successfully fetched the READ_ID, "%s", from database' % read_id)
예제 #8
0
def import_from_gsheet_helper(user,
                              silo_id,
                              silo_name,
                              spreadsheet_id,
                              sheet_id=None):
    msgs = []
    read_url = get_spreadsheet_url(spreadsheet_id)

    if spreadsheet_id is None:
        msgs.append({
            "level": messages.ERROR,
            "msg": "A Google Spreadsheet is not selected to import data from.",
            "redirect": reverse('index')
        })

    credential_obj = get_credential_object(user)
    if not isinstance(credential_obj, OAuth2Credentials):
        msgs.append(credential_obj)
        return msgs

    defaults = {
        "name": silo_name,
        "description": "Google Sheet Import",
        "public": False,
        "owner": user
    }
    silo, created = Silo.objects.get_or_create(
        pk=None if silo_id == '0' else silo_id, defaults=defaults)
    #if not created and silo.unique_fields.exists() == False:
    #    msgs.append({"level": messages.ERROR,
    #                "msg": "A unique column must be specfied when importing to an existing table. <a href='%s'>Specify Unique Column</a>" % reverse_lazy('siloDetail', kwargs={"silo_id": silo.id}),
    #                "redirect": None})
    #    return msgs

    #if created:
    msgs.append({"silo_id": silo.id})

    service = get_authorized_service(credential_obj)

    # fetch the google spreadsheet metadata
    try:
        spreadsheet = service.spreadsheets().get(
            spreadsheetId=spreadsheet_id).execute()
    except HttpAccessTokenRefreshError as e:
        return [get_credential_object(user, True)]
    except Exception as e:
        error = json.loads(e.content).get("error")
        msg = "%s: %s" % (error.get("status"), error.get("message"))
        msgs.append({"level": messages.ERROR, "msg": msg})
        return msgs

    spreadsheet_name = spreadsheet.get("properties", {}).get("title", "")

    gsheet_read = get_or_create_read("GSheet Import", spreadsheet_name,
                                     "Google Spreadsheet Import",
                                     spreadsheet_id, user, silo)
    sheet_name = "Sheet1"
    if sheet_id:
        gsheet_read.gsheet_id = sheet_id
        gsheet_read.save()

    if gsheet_read.gsheet_id:
        sheets = spreadsheet.get("sheets", None)
        for sheet in sheets:
            properties = sheet.get("properties", None)
            if properties:
                if str(properties.get("sheetId")) == str(
                        gsheet_read.gsheet_id):
                    sheet_name = properties.get("title")

    headers = []
    data = None

    combine_cols = False
    # Fetch data from gsheet
    try:
        result = service.spreadsheets().values().get(
            spreadsheetId=spreadsheet_id, range=sheet_name).execute()
        data = result.get("values", [])
    except Exception as e:
        logger.error(e)
        msgs.append({
            "level": messages.ERROR,
            "msg": "Something went wrong 22: %s" % e,
            "redirect": None
        })
        return msgs

    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    for r, row in enumerate(data):
        if r == 0:
            headers = row
            continue
        filter_criteria = {}

        # build filter_criteria if unique field(s) have been setup for this silo
        for unique_field in unique_fields:
            try:
                filter_criteria.update(
                    {unique_field.name: row[headers.index(unique_field.name)]})
            except KeyError:
                pass
            except ValueError:
                pass
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            # if a row is found, then fetch and update it
            # if no row is found then create a new one
            # if multiple rows are found then skip b/c not sure which one to update
            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                lvs.edit_date = timezone.now()
            except LabelValueStore.DoesNotExist as e:
                lvs = LabelValueStore()
            except LabelValueStore.MultipleObjectsReturned as e:
                for k, v in filter_criteria.iteritems():
                    skipped_rows.add("%s=%s" % (k, v))
                continue
        else:
            lvs = LabelValueStore()

        for c, col in enumerate(row):
            try:
                key = headers[c]
            except IndexError as e:
                #this happens when a column header is missing gsheet
                continue
            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "edit_date": key = "editted_date"
            elif key == "create_date": key = "created_date"
            val = smart_str(row[c], strings_only=True)
            key = smart_str(key)
            setattr(lvs, key.replace(".", "_").replace("$", "USD"), val)
        lvs.silo_id = silo.id
        lvs.create_date = timezone.now()
        lvs.save()

    combineColumns(silo.pk)

    if skipped_rows:
        msgs.append({
            "level":
            messages.WARNING,
            "msg":
            "Skipped updating/adding records where %s because there are already multiple records."
            % ",".join(str(s) for s in skipped_rows)
        })

    msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"})
    return msgs
예제 #9
0
def import_from_gsheet_helper(user, silo_id, silo_name, spreadsheet_id, sheet_id=None):
    msgs = []
    read_url = get_spreadsheet_url(spreadsheet_id)

    if spreadsheet_id is None:
        msgs.append({"level": messages.ERROR,
                    "msg": "A Google Spreadsheet is not selected to import data from.",
                    "redirect" : reverse('index') })

    credential_obj = get_credential_object(user)
    if not isinstance(credential_obj, OAuth2Credentials):
        msgs.append(credential_obj)
        return msgs

    defaults = {"name": silo_name, "description": "Google Sheet Import", "public": False, "owner": user}
    silo, created = Silo.objects.get_or_create(pk=None if silo_id=='0' else silo_id, defaults=defaults)
    if not created and silo.unique_fields.exists() == False:
        msgs.append({"level": messages.ERROR,
                    "msg": "A unique column must be specfied when importing to an existing table. <a href='%s'>Specify Unique Column</a>" % reverse_lazy('siloDetail', kwargs={"silo_id": silo.id}),
                    "redirect": None})
        return msgs

    if created:
        msgs.append({"silo_id": silo.id})

    service = get_authorized_service(credential_obj)

    # fetch the google spreadsheet metadata
    try:
        spreadsheet = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute()
    except HttpAccessTokenRefreshError as e:
        return [get_credential_object(user, True)]
    except Exception as e:
        error = json.loads(e.content).get("error")
        msg = "%s: %s" % (error.get("status"), error.get("message"))
        msgs.append({"level": messages.ERROR,
                    "msg": msg})
        return msgs

    spreadsheet_name = spreadsheet.get("properties", {}).get("title", "")

    gsheet_read = get_or_create_read("GSheet Import",
                                     spreadsheet_name,
                                     "Google Spreadsheet Import",
                                     spreadsheet_id,
                                     user,
                                     silo)
    sheet_name = "Sheet1"
    if sheet_id:
        gsheet_read.gsheet_id = sheet_id
        gsheet_read.save()

    if gsheet_read.gsheet_id:
        sheets = spreadsheet.get("sheets", None)
        for sheet in sheets:
            properties = sheet.get("properties", None)
            if properties:
                if str(properties.get("sheetId")) == str(gsheet_read.gsheet_id):
                    sheet_name = properties.get("title")

    headers = []
    data = None

    combine_cols = False
    # Fetch data from gsheet
    try:
        result = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=sheet_name).execute()
        data = result.get("values", [])
    except Exception as e:
        logger.error(e)
        msgs.append({"level": messages.ERROR,
                    "msg": "Something went wrong 22: %s" % e,
                    "redirect": None})
        return msgs

    unique_fields = silo.unique_fields.all()
    skipped_rows = set()
    for r, row in enumerate(data):
        if r == 0: headers = row; continue;
        filter_criteria = {}

        # build filter_criteria if unique field(s) have been setup for this silo
        for unique_field in unique_fields:
            try:
                filter_criteria.update({unique_field.name: row[headers.index(unique_field.name)]})
            except KeyError:
                pass
            except ValueError:
                pass
        if filter_criteria:
            filter_criteria.update({'silo_id': silo.id})
            # if a row is found, then fetch and update it
            # if no row is found then create a new one
            # if multiple rows are found then skip b/c not sure which one to update
            try:
                lvs = LabelValueStore.objects.get(**filter_criteria)
                lvs.edit_date = timezone.now()
            except LabelValueStore.DoesNotExist as e:
                lvs = LabelValueStore()
            except LabelValueStore.MultipleObjectsReturned as e:
                for k,v in filter_criteria.iteritems():
                    skipped_rows.add("%s=%s" % (k,v))
                continue
        else:
            lvs = LabelValueStore()

        for c, col in enumerate(row):
            try:
                key = headers[c]
            except IndexError as e:
                #this happens when a column header is missing gsheet
                continue
            if key == "" or key is None or key == "silo_id": continue
            elif key == "id" or key == "_id": key = "user_assigned_id"
            elif key == "edit_date": key = "editted_date"
            elif key == "create_date": key = "created_date"
            val = smart_str(row[c], strings_only=True)
            key = smart_str(key)
            setattr(lvs, key.replace(".", "_").replace("$", "USD"), val)
        lvs.silo_id = silo.id
        lvs.create_date = timezone.now()
        lvs.save()

    combineColumns(silo.pk)

    if skipped_rows:
        msgs.append({"level": messages.WARNING,
                    "msg": "Skipped updating/adding records where %s because there are already multiple records." % ",".join(str(s) for s in skipped_rows)})

    msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"})
    return msgs
예제 #10
0
파일: views.py 프로젝트: as3adm/tola
def saveAndImportRead(request):
    """ 
    Saves ONA read if not already in the db and then imports its data 
    """
    if request.method != 'POST':
        return HttpResponseBadRequest("HTTP method, %s, is not supported" % request.method)

    read_type = ReadType.objects.get(read_type="JSON")
    name = request.POST.get('read_name', None)
    url = request.POST.get('read_url', None)
    owner = request.user
    description = request.POST.get('description', None)
    silo_id = None
    read = None
    silo = None
    provider = "ONA"
    try:
        silo_id = int(request.POST.get("silo_id", None))
    except Exception as e:
         print(e)
         return HttpResponse("Silo ID can only be an integer")

    try:
        read, created = Read.objects.get_or_create(read_name=name, owner=owner, 
            defaults={'read_url': url, 'type': read_type, 'description': description})
        if created: read.save()
    except Exception as e:
        print(e)
        return HttpResponse("Invalid name and/or URL")
    
    # Fetch the data from ONA
    ona_token = ThirdPartyTokens.objects.get(user=request.user, name=provider)
    response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
    data = json.loads(response.content)

    existing_silo_cols = []
    new_cols = []
    show_mapping = False
    
    if silo_id <= 0:
        # create a new silo by the name of "name"
        silo = Silo(name=name, public=False, owner=owner)
        silo.save()
        silo.reads.add(read)
    else:
        # import into existing silo
        # Compare the columns of imported data with existing silo in case it needs merging
        silo = Silo.objects.get(pk=silo_id)
        lvs = json.loads(LabelValueStore.objects(silo_id=silo.id).to_json())
        for l in lvs:
            existing_silo_cols.extend(c for c in l.keys() if c not in existing_silo_cols)
        
        for row in data:
            new_cols.extend(c for c in row.keys() if c not in new_cols)
        
        for c in existing_silo_cols:
            if c == "silo_id" or c == "create_date": continue
            if c not in new_cols: show_mapping = True
            if show_mapping == True: 
                params = {'getSourceFrom':existing_silo_cols, 'getSourceTo':new_cols, 'from_silo_id':0, 'to_silo_id':silo.id}
                response = render_to_response("display/merge-column-form-inner.html", params, context_instance=RequestContext(request))
                response['show_mapping'] = '1'
                return response
    
    if silo:
        # import data into this silo
        num_rows = len(data)
        #loop over data and insert create and edit dates and append to dict
        for counter, row in enumerate(data):
            lvs = LabelValueStore()
            lvs.silo_id = silo.pk
            for new_label, new_value in row.iteritems():
                if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                    setattr(lvs, new_label, new_value)
            lvs.create_date = timezone.now()
            result = lvs.save()
        if num_rows == (counter+1):
            combineColumns(silo_id)
            return HttpResponse("View silo data at <a href='/silo_detail/%s' target='_blank'>See your data</a>" % silo.pk)
    return HttpResponse(read.pk)
예제 #11
0
def updateMergeSilo(request, pk):
    silo = None
    mapping = None

    try:
        silo = Silo.objects.get(id=pk)
    except Silo.DoesNotExist as e:
        return HttpResponse("Table (%s) does not exist" % pk)

    try:
        mapping = MergedSilosFieldMapping.objects.get(merged_silo = silo.pk)
        left_table_id = mapping.from_silo.pk
        right_table_id = mapping.to_silo.pk
        data = mapping.mapping

        merged_data = mergeTwoSilos(data, left_table_id, right_table_id)
        try:
            merged_data['status']
            messages.error(request, 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message))
            return HttpResponseRedirect(reverse_lazy('siloDetail', kwargs={'id': pk},))
        except Exception as e:
            pass

        lvs = LabelValueStore.objects(silo_id=silo.id)
        num_rows_deleted = lvs.delete()

        # put the new silo data in mongo db.
        for counter, row in enumerate(merged_data):
            lvs = LabelValueStore()
            lvs.silo_id = silo.pk
            for l, v in row.iteritems():
                if l == 'silo_id' or l == '_id' or l == 'create_date' or l == 'edit_date':
                    continue
                else:
                    setattr(lvs, l, v)
            lvs.create_date = timezone.now()
            result = lvs.save()

    except MergedSilosFieldMapping.DoesNotExist as e:
        # Check if the silo has a source from ONA: and if so, then update its data
        stop = False



        if silo.unique_fields.all().exists() == False:
            stop = True
            messages.info(request, "In order to update a table, it must have a unique field set.")


        read_type = ReadType.objects.get(read_type="JSON")
        reads = silo.reads.filter(type=read_type.pk)
        for read in reads:
            ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA")
            response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token})
            data = json.loads(response.content)

            # import data into this silo
            num_rows = len(data)
            if num_rows == 0:
                continue

            counter = None
            #loop over data and insert create and edit dates and append to dict
            for counter, row in enumerate(data):
                skip_row = False
                #if the value of unique column is already in existing_silo_data then skip the row
                for unique_field in silo.unique_fields.all():
                    filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]}
                    if LabelValueStore.objects.filter(**filter_criteria).count() > 0:
                        skip_row = True
                        continue
                if skip_row == True:
                    continue
                # at this point, the unique column value is not in existing data so append it.
                lvs = LabelValueStore()
                lvs.silo_id = silo.pk
                for new_label, new_value in row.iteritems():
                    if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date":
                        setattr(lvs, new_label, new_value)
                lvs.create_date = timezone.now()
                result = lvs.save()

            if num_rows == (counter+1):
                combineColumns(silo.pk)
        # reset num_rows
        num_rows = 0
        read_types = ReadType.objects.filter(Q(read_type="GSheet Import") | Q(read_type="Google Spreadsheet"))
        reads = silo.reads.filter(reduce(or_, [Q(type=read.id) for read in read_types]))
        for read in reads:
            # get gsheet authorized client and the gsheet id to fetch its data into the silo
            storage = Storage(GoogleCredentialsModel, 'id', silo.owner, 'credential')
            credential = storage.get()
            credential_json = json.loads(credential.to_json())
            #self.stdout.write("%s" % credential_json)
            if credential is None or credential.invalid == True:
                messages.error(request, "There was a Google credential problem with user: %s for gsheet %s" % (request.user, read.pk))
                continue

            suc = import_from_google_spreadsheet(credential_json, silo, read.resource_id)
            if suc == False:
                messages.error(request, "Failed to import data from gsheet %s " % read.pk)

        if not reads:
            stop = True
            messages.info(request, "Tables that only have a CSV source cannot be updated.")
    return HttpResponseRedirect(reverse_lazy('siloDetail', kwargs={'id': pk},))
예제 #12
0
def mergeTwoSilos(mapping_data, lsid, rsid, msid):
    """
    @params
    mapping_data: data that describes how mapping is done between two silos
    lsid: Left Silo ID
    rsid: Right Silo ID
    msid: Merge Silo ID
    """
    mappings = json.loads(mapping_data)

    l_unmapped_cols = mappings.pop('left_unmapped_cols')
    r_unampped_cols = mappings.pop('right_unmapped_cols')

    merged_cols = []

    #print("lsid:% rsid:%s msid:%s" % (lsid, rsid, msid))
    l_silo_data = LabelValueStore.objects(silo_id=lsid)

    r_silo_data = LabelValueStore.objects(silo_id=rsid)

    # Loop through the mapped cols and add them to the list of merged_cols
    for k, v in mappings.iteritems():
        col_name = v['right_table_col']
        if col_name == "silo_id" or col_name == "create_date": continue
        if col_name not in merged_cols:
            merged_cols.append(col_name)

    for lef_col in l_unmapped_cols:
        if lef_col not in merged_cols: merged_cols.append(lef_col)

    for right_col in r_unampped_cols:
        if right_col not in merged_cols: merged_cols.append(right_col)

    # retrieve the left silo
    try:
        lsilo = Silo.objects.get(pk=lsid)
    except Silo.DoesNotExist as e:
        msg = "Left Silo does not exist: silo_id=%s" % lsid
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrieve the right silo
    try:
        rsilo = Silo.objects.get(pk=rsid)
    except Silo.DoesNotExist as e:
        msg = "Right Table does not exist: table_id=%s" % rsid
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrieve the merged silo
    try:
        msilo = Silo.objects.get(pk=msid)
    except Silo.DoesNotExist as e:
        msg = "Merged Table does not exist: table_id=%s" % msid
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrieve the unique fields set for the right silo
    r_unique_fields = rsilo.unique_fields.all()

    if not r_unique_fields:
        msg = "The table, [%s], must have a unique column and it should be the same as the one specified in [%s] table." % (rsilo.name, lsilo.name)
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrive the unique fields of the merged_silo
    m_unique_fields = msilo.unique_fields.all()

    # make sure that the unique_fields from right table are in the merged_table
    # by adding them to the merged_cols array.
    for uf in r_unique_fields:
        if uf.name not in merged_cols: merged_cols.append(uf.name)

        #make sure to set the same unique_fields in the merged_table
        if not m_unique_fields.filter(name=uf.name).exists():
            unique_field, created = UniqueFields.objects.get_or_create(name=uf.name, silo=msilo, defaults={"name": uf.name, "silo": msilo})

    # Get the correct set of data from the right table
    for row in r_silo_data:
        merged_row = OrderedDict()
        for k in row:
            # Skip over those columns in the right table that sholdn't be in the merged_table
            if k not in merged_cols: continue
            merged_row[k] = row[k]

        # now set its silo_id to the merged_table id
        merged_row["silo_id"] = msid
        merged_row["create_date"] = timezone.now()

        filter_criteria = {}
        for uf in r_unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(merged_row[uf.name])})
            except KeyError as e:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in all rows of the right_table
                logger.warning("The field, %s, is not present in table id=%s" % (uf.name, rsid))

        # adding the merged_table_id because the filter criteria should search the merged_table
        filter_criteria.update({'silo_id': msid})

        #this is an upsert operation.; note the upsert=True
        db.label_value_store.update_one(filter_criteria, {"$set": merged_row}, upsert=True)


    # Retrieve the unique_fields set by left table
    l_unique_fields = lsilo.unique_fields.all()
    if not l_unique_fields:
        msg = "The table, [%s], must have a unique column and it should be the same as the one specified in [%s] table." % (lsilo.name, rsilo.name)
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    for uf in l_unique_fields:
        # if there are unique fields that are not in the right table then show error
        if not r_unique_fields.filter(name=uf.name).exists():
            msg = "Both tables (%s, %s) must have the same column set as unique fields" % (lsilo.name, rsilo.name)
            logger.error(msg)
            return {"status": "danger", "message": msg}

    # now loop through left table and apply the mapping
    for row in l_silo_data:
        merged_row = OrderedDict()
        # Loop through the column mappings for each row in left_table.
        for k, v in mappings.iteritems():
            merge_type = v['merge_type']
            left_cols = v['left_table_cols']
            right_col = v['right_table_col']

            # if merge_type is specified then there must be multiple columns in the left_cols array
            if merge_type:
                mapped_value = ''
                for col in left_cols:
                    if merge_type == 'Sum' or merge_type == 'Avg':
                        try:
                            if mapped_value == '':
                                mapped_value = float(row[col])
                            else:
                                mapped_value = float(mapped_value) + float(row[col])
                        except Exception as e:
                            msg = 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message)
                            logger.error(msg)
                            return {'status': "danger",  'message': msg}
                    else:
                        mapped_value += ' ' + smart_str(row[col])

                # Now calculate avg if the merge_type was actually "Avg"
                if merge_type == 'Avg':
                    mapped_value = mapped_value / len(left_cols)
            # only one col in left table is mapped to one col in the right table.
            else:
                col = str(left_cols[0])
                if col == "silo_id": continue
                try:
                    mapped_value = row[col]
                except KeyError as e:
                    # When updating data in merged_table at a later time, it is possible
                    # the origianl source tables may have had some columns removed in which
                    # we might get a KeyError so in that case we just skip it.
                    continue

            #right_col is used as in index of merged_row because one or more left cols map to one col in right table
            merged_row[right_col] = mapped_value

        # Get data from left unmapped columns:
        for col in l_unmapped_cols:
            if col in row:
                merged_row[col] = row[col]

        filter_criteria = {}
        for uf in l_unique_fields:
            try:
                filter_criteria.update({str(uf.name): str(merged_row[uf.name])})
            except KeyError:
                # when this excpetion occurs, it means that the col identified
                # as the unique_col is not present in all rows of the left_table
                msg ="The field, %s, is not present in table id=%s" % (uf.name, lsid)
                logger.warning(msg)

        filter_criteria.update({'silo_id': msid})

        # override the silo_id and create_date columns values to make sure they're not set
        # to the values that are in left table or right table
        merged_row["silo_id"] = msid
        merged_row["create_date"] = timezone.now()

        # Now update or insert a row if there is no matching record available
        res = db.label_value_store.update_one(filter_criteria, {"$set": merged_row}, upsert=True)

        # Make sure all rows have the same cols in the merged_silo
    combineColumns(msid)
    return {'status': "success",  'message': "Merged data successfully"}
예제 #13
0
def appendTwoSilos(mapping_data, lsid, rsid, msid):
    """
    @params
    mapping_data: data that describes how mapping is done between two silos
    lsid: Left Silo ID
    rsid: Right Silo ID
    msid: Merge Silo ID
    """
    mappings = json.loads(mapping_data)

    l_unmapped_cols = mappings.pop('left_unmapped_cols')
    r_unampped_cols = mappings.pop('right_unmapped_cols')

    merged_cols = []

    #print("lsid:% rsid:%s msid:%s" % (lsid, rsid, msid))
    l_silo_data = LabelValueStore.objects(silo_id=lsid)

    r_silo_data = LabelValueStore.objects(silo_id=rsid)

    # Loop through the mapped cols and add them to the list of merged_cols
    for k, v in mappings.iteritems():
        col_name = v['right_table_col']
        if col_name == "silo_id" or col_name == "create_date": continue
        if col_name not in merged_cols:
            merged_cols.append(col_name)

    for lef_col in l_unmapped_cols:
        if lef_col not in merged_cols: merged_cols.append(lef_col)

    for right_col in r_unampped_cols:
        if right_col not in merged_cols: merged_cols.append(right_col)

    # retrieve the left silo
    try:
        lsilo = Silo.objects.get(pk=lsid)
    except Silo.DoesNotExist as e:
        msg = "Table id=%s does not exist." % lsid
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrieve the right silo
    try:
        rsilo = Silo.objects.get(pk=rsid)
    except Silo.DoesNotExist as e:
        msg = "Right Table does not exist: table_id=%s" % rsid
        logger.error(msg)
        return {'status': "danger",  'message': msg}

    # retrieve the merged silo
    try:
        msilo = Silo.objects.get(pk=msid)
    except Silo.DoesNotExist as e:
        msg = "Merged Table does not exist: table_id=%s" % msid
        logger.error(msg)
        return {'status': "danger",  'message': msg}


    # Delete Any existing data from the merged_table
    deleted_res = db.label_value_store.delete_many({"silo_id": msid})

    # Get the correct set of data from the right table
    for row in r_silo_data:
        merged_row = OrderedDict()
        for k in row:
            # Skip over those columns in the right table that sholdn't be in the merged_table
            if k not in merged_cols: continue
            merged_row[k] = row[k]

        # now set its silo_id to the merged_table id
        merged_row["silo_id"] = msid
        merged_row["create_date"] = timezone.now()
        db.label_value_store.insert_one(merged_row)


    # now loop through left table and apply the mapping
    for row in l_silo_data:
        merged_row = OrderedDict()
        # Loop through the column mappings for each row in left_table.
        for k, v in mappings.iteritems():
            merge_type = v['merge_type']
            left_cols = v['left_table_cols']
            right_col = v['right_table_col']

            # if merge_type is specified then there must be multiple columns in the left_cols array
            if merge_type:
                mapped_value = ''
                for col in left_cols:
                    if merge_type == 'Sum' or merge_type == 'Avg':
                        try:
                            if mapped_value == '':
                                mapped_value = float(row[col])
                            else:
                                mapped_value = float(mapped_value) + float(row[col])
                        except Exception as e:
                            msg = 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message)
                            logger.error(msg)
                            return {'status': "danger",  'message': msg}
                    else:
                        mapped_value += ' ' + smart_str(row[col])

                # Now calculate avg if the merge_type was actually "Avg"
                if merge_type == 'Avg':
                    mapped_value = mapped_value / len(left_cols)
            # only one col in left table is mapped to one col in the right table.
            else:
                col = str(left_cols[0])
                if col == "silo_id": continue
                try:
                    mapped_value = row[col]
                except KeyError as e:
                    # When updating data in merged_table at a later time, it is possible
                    # the origianl source tables may have had some columns removed in which
                    # we might get a KeyError so in that case we just skip it.
                    continue

            #right_col is used as in index of merged_row because one or more left cols map to one col in right table
            merged_row[right_col] = mapped_value

        # Get data from left unmapped columns:
        for col in l_unmapped_cols:
            if col in row:
                merged_row[col] = row[col]

        merged_row["silo_id"] = msid
        merged_row["create_date"] = timezone.now()

        db.label_value_store.insert_one(merged_row)
    combineColumns(msid)
    return {'status': "success",  'message': "Appended data successfully"}