def doMerge(request): from_silo_id = request.POST['from_silo_id'] to_silo_id = request.POST["to_silo_id"] try: from_silo_id = int(from_silo_id) to_silo_id = int(to_silo_id) except ValueError as e: from_silo_id = None to_silo_id = None print("The from_silo_id and/or the to_silo_id is not an integer") #conn = pymongo.Connection() #db = conn.tola client = MongoClient(uri) db = client.tola if from_silo_id != None and to_silo_id != None: for k in request.POST: if k != "silo_id" and k != "_id" and k != "to_silo_id" and k != "from_silo_id" and k != "csrfmiddlewaretoken": from_field = request.POST.getlist(k)[0].lower() to_field = request.POST.getlist(k)[1].lower() if to_field == "Ignore": "This field should be deleted from the silo_id = 'from_silo_id'" #print ("FROM FIELD: %s and SILO_ID: %s" % (from_field, from_silo_id)) db.label_value_store.update_many( { "silo_id": from_silo_id }, { "$unset": {from_field: ""}, }, False #, False, None, True ) elif to_field == "0": "Nothing should be done in this case because when the silo_id is updated to to_silo_id this field will become part of the to_silo_id " pass else: if from_field != to_field: db.label_value_store.update_many( { "silo_id": from_silo_id }, { "$rename": { from_field: to_field }, "$currentDate": { 'edit_date': True } }, False ) db.label_value_store.update_many( { "silo_id": from_silo_id }, { "$set": { "silo_id": to_silo_id }, }, False #, False, None, True ) Silo.objects.filter(pk = from_silo_id).delete() combineColumns(to_silo_id) #messages.success(request, "Silos merged successfully") return HttpResponseRedirect("/silo_detail/%s" % to_silo_id)
def uploadFile(request, id): """ Upload CSV file and save its data """ if request.method == 'POST': form = UploadForm(request.POST) if form.is_valid(): read_obj = Read.objects.get(pk=id) today = datetime.date.today() today.strftime('%Y-%m-%d') today = str(today) silo = None user = User.objects.get(username__exact=request.user) if request.POST.get("new_silo", None): silo = Silo(name=request.POST['new_silo'], owner=user, public=False, create_date=today) silo.save() else: silo = Silo.objects.get(id = request.POST["silo_id"]) silo.reads.add(read_obj) silo_id = silo.id #create object from JSON String data = csv.reader(read_obj.file_data) labels = None try: labels = data.next() #First row of CSV should be Column Headers except IOError as e: messages.error(request, "The CSV file could not be found") return HttpResponseRedirect(reverse_lazy('showRead', kwargs={'id': read_obj.id},)) for row in data: lvs = LabelValueStore() lvs.silo_id = silo_id for col_counter, val in enumerate(row): key = str(labels[col_counter]).replace(".", "_").replace("$", "USD") if key != "" and key is not None and key != "silo_id" and key != "id" and key != "_id": if key == "create_date": key = "created_date" if key == "edit_date": key = "editted_date" setattr(lvs, key, val) lvs.create_date = timezone.now() lvs.save() combineColumns(silo_id) return HttpResponseRedirect('/silo_detail/' + str(silo_id) + '/') else: messages.error(request, "There was a problem with reading the contents of your file" + form.errors) #print form.errors user = User.objects.get(username__exact=request.user) # get all of the silo info to pass to the form get_silo = Silo.objects.filter(owner=user) # display login form return render(request, 'read/file.html', { 'read_id': id, 'get_silo': get_silo, })
def getJSON(request): """ Get JSON feed info from form then grab data """ if request.method == 'POST': # retrieve submitted Feed info from database read_obj = Read.objects.get(id = request.POST.get("read_id", None)) # set date time stamp today = datetime.date.today() today.strftime('%Y-%m-%d') today = str(today) try: request2 = urllib2.Request(read_obj.read_url) #if they passed in a usernmae get auth info from form post then encode and add to the request header if request.POST['user_name']: username = request.POST['user_name'] password = request.POST['password'] base64string = base64.encodestring('%s:%s' % (username, password))[:-1] request2.add_header("Authorization", "Basic %s" % base64string) #retrieve JSON data from formhub via auth info json_file = urllib2.urlopen(request2) except Exception as e: #print e messages.error(request, 'Authentication Failed, Please double check your login credentials and URL!') silo = None user = User.objects.get(username__exact=request.user) if request.POST.get("new_silo", None): silo = Silo(name=request.POST['new_silo'], owner=user, public=False, create_date=today) silo.save() else: silo = Silo.objects.get(id = request.POST["silo_id"]) silo.reads.add(read_obj) silo_id = silo.id #create object from JSON String data = json.load(json_file) json_file.close() #loop over data and insert create and edit dates and append to dict for row in data: lvs = LabelValueStore() lvs.silo_id = silo_id for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() lvs.save() combineColumns(silo_id) messages.success(request, "Data imported successfully.") return HttpResponseRedirect('/silo_detail/' + str(silo_id) + '/') else: messages.error(request, "Invalid Request for importing JSON data") return HttpResponseRedirect("/")
def import_from_google_spreadsheet(credential_json, silo, spreadsheet_key): sp_client = get_authorized_sp_client(credential_json) # Create a WorksheetQuery object to allow for filtering for worksheets by the title worksheet_query = gdata.spreadsheets.client.WorksheetQuery( title="Sheet1", title_exact=True) # Get a feed of all worksheets in the specified spreadsheet that matches the worksheet_query worksheets_feed = sp_client.get_worksheets(spreadsheet_key) # Retrieve the worksheet_key from the first match in the worksheets_feed object worksheet_key = worksheets_feed.entry[0].id.text.rsplit("/", 1)[1] ws = worksheets_feed.entry[0] #print '%s - rows %s - cols %s\n' % (ws.title.text, ws.row_count.text, ws.col_count.text) lvs = LabelValueStore() list_feed = sp_client.get_list_feed(spreadsheet_key, worksheet_key) for row in list_feed.entry: row_data = row.to_dict() skip_row = False for key, val in row_data.iteritems(): #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.id, unique_field.name: val} if LabelValueStore.objects.filter( **filter_criteria).count() > 0: skip_row = True continue if skip_row == True: break if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "create_date": key = "created_date" elif key == "edit_date": key = "editted_date" setattr(lvs, key, val) if skip_row == True: continue lvs.silo_id = silo.id lvs.create_date = timezone.now() lvs.save() lvs = LabelValueStore() combineColumns(silo.id) return True
def import_from_google_spreadsheet(credential_json, silo, spreadsheet_key): sp_client = get_authorized_sp_client(credential_json) # Create a WorksheetQuery object to allow for filtering for worksheets by the title worksheet_query = gdata.spreadsheets.client.WorksheetQuery(title="Sheet1", title_exact=True) # Get a feed of all worksheets in the specified spreadsheet that matches the worksheet_query worksheets_feed = sp_client.get_worksheets(spreadsheet_key) # Retrieve the worksheet_key from the first match in the worksheets_feed object worksheet_key = worksheets_feed.entry[0].id.text.rsplit("/", 1)[1] ws = worksheets_feed.entry[0] #print '%s - rows %s - cols %s\n' % (ws.title.text, ws.row_count.text, ws.col_count.text) lvs = LabelValueStore() list_feed = sp_client.get_list_feed(spreadsheet_key, worksheet_key) for row in list_feed.entry: row_data = row.to_dict() skip_row = False for key, val in row_data.iteritems(): #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.id, unique_field.name: val} if LabelValueStore.objects.filter(**filter_criteria).count() > 0: skip_row = True continue if skip_row == True: break if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "create_date": key = "created_date" elif key == "edit_date": key = "editted_date" setattr(lvs, key, val) if skip_row == True: continue lvs.silo_id = silo.id lvs.create_date = timezone.now() lvs.save() lvs = LabelValueStore() combineColumns(silo.id) return True
def handle(self, *args, **options): skip_row = False frequency = options['frequency'] if frequency != "daily" and frequency != "weekly": return self.stdout.write("Frequency argument can either be 'daily' or 'weekly'") silos = Silo.objects.filter(unique_fields__isnull=False, reads__autopull=True, reads__autopull_frequency__isnull=False, reads__autopull_frequency = frequency).distinct() read_type = ReadType.objects.get(read_type="JSON") for silo in silos: reads = silo.reads.filter(type=read_type.pk) for read in reads: ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): skip_row = False #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]} if LabelValueStore.objects.filter(**filter_criteria).count() > 0: skip_row = True continue if skip_row == True: continue # at this point, the unique column value is not in existing data so append it. lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo.pk) self.stdout.write('Successfully fetched the READ_ID, "%s", from ONA' % read.pk)
def handle(self, *args, **options): silo = None read = None silo_id = options['silo_id'] username = options['username'] user = User.objects.get(username__exact=username) reads = Read.objects.filter(owner=user) try: silo = Silo.objects.get(pk=silo_id) except Silo.DoesNotExist: raise CommandError('Silo "%s" does not exist' % silo_id) for read_id in options['read_ids']: try: read = reads.filter(pk=read_id)[0] except Read.DoesNotExist: raise CommandError('Read "%s" does not exist for user, %s' % (read_id, user.username)) # Fetch the data from ONA ona_token = ThirdPartyTokens.objects.get(user=user.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo_id) self.stdout.write('Successfully fetched the READ_ID, "%s", from database' % read_id)
def import_from_gsheet_helper(user, silo_id, silo_name, spreadsheet_id, sheet_id=None): msgs = [] read_url = get_spreadsheet_url(spreadsheet_id) if spreadsheet_id is None: msgs.append({ "level": messages.ERROR, "msg": "A Google Spreadsheet is not selected to import data from.", "redirect": reverse('index') }) credential_obj = get_credential_object(user) if not isinstance(credential_obj, OAuth2Credentials): msgs.append(credential_obj) return msgs defaults = { "name": silo_name, "description": "Google Sheet Import", "public": False, "owner": user } silo, created = Silo.objects.get_or_create( pk=None if silo_id == '0' else silo_id, defaults=defaults) #if not created and silo.unique_fields.exists() == False: # msgs.append({"level": messages.ERROR, # "msg": "A unique column must be specfied when importing to an existing table. <a href='%s'>Specify Unique Column</a>" % reverse_lazy('siloDetail', kwargs={"silo_id": silo.id}), # "redirect": None}) # return msgs #if created: msgs.append({"silo_id": silo.id}) service = get_authorized_service(credential_obj) # fetch the google spreadsheet metadata try: spreadsheet = service.spreadsheets().get( spreadsheetId=spreadsheet_id).execute() except HttpAccessTokenRefreshError as e: return [get_credential_object(user, True)] except Exception as e: error = json.loads(e.content).get("error") msg = "%s: %s" % (error.get("status"), error.get("message")) msgs.append({"level": messages.ERROR, "msg": msg}) return msgs spreadsheet_name = spreadsheet.get("properties", {}).get("title", "") gsheet_read = get_or_create_read("GSheet Import", spreadsheet_name, "Google Spreadsheet Import", spreadsheet_id, user, silo) sheet_name = "Sheet1" if sheet_id: gsheet_read.gsheet_id = sheet_id gsheet_read.save() if gsheet_read.gsheet_id: sheets = spreadsheet.get("sheets", None) for sheet in sheets: properties = sheet.get("properties", None) if properties: if str(properties.get("sheetId")) == str( gsheet_read.gsheet_id): sheet_name = properties.get("title") headers = [] data = None combine_cols = False # Fetch data from gsheet try: result = service.spreadsheets().values().get( spreadsheetId=spreadsheet_id, range=sheet_name).execute() data = result.get("values", []) except Exception as e: logger.error(e) msgs.append({ "level": messages.ERROR, "msg": "Something went wrong 22: %s" % e, "redirect": None }) return msgs unique_fields = silo.unique_fields.all() skipped_rows = set() for r, row in enumerate(data): if r == 0: headers = row continue filter_criteria = {} # build filter_criteria if unique field(s) have been setup for this silo for unique_field in unique_fields: try: filter_criteria.update( {unique_field.name: row[headers.index(unique_field.name)]}) except KeyError: pass except ValueError: pass if filter_criteria: filter_criteria.update({'silo_id': silo.id}) # if a row is found, then fetch and update it # if no row is found then create a new one # if multiple rows are found then skip b/c not sure which one to update try: lvs = LabelValueStore.objects.get(**filter_criteria) lvs.edit_date = timezone.now() except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() except LabelValueStore.MultipleObjectsReturned as e: for k, v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k, v)) continue else: lvs = LabelValueStore() for c, col in enumerate(row): try: key = headers[c] except IndexError as e: #this happens when a column header is missing gsheet continue if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" val = smart_str(row[c], strings_only=True) key = smart_str(key) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) lvs.silo_id = silo.id lvs.create_date = timezone.now() lvs.save() combineColumns(silo.pk) if skipped_rows: msgs.append({ "level": messages.WARNING, "msg": "Skipped updating/adding records where %s because there are already multiple records." % ",".join(str(s) for s in skipped_rows) }) msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"}) return msgs
def import_from_gsheet_helper(user, silo_id, silo_name, spreadsheet_id, sheet_id=None): msgs = [] read_url = get_spreadsheet_url(spreadsheet_id) if spreadsheet_id is None: msgs.append({"level": messages.ERROR, "msg": "A Google Spreadsheet is not selected to import data from.", "redirect" : reverse('index') }) credential_obj = get_credential_object(user) if not isinstance(credential_obj, OAuth2Credentials): msgs.append(credential_obj) return msgs defaults = {"name": silo_name, "description": "Google Sheet Import", "public": False, "owner": user} silo, created = Silo.objects.get_or_create(pk=None if silo_id=='0' else silo_id, defaults=defaults) if not created and silo.unique_fields.exists() == False: msgs.append({"level": messages.ERROR, "msg": "A unique column must be specfied when importing to an existing table. <a href='%s'>Specify Unique Column</a>" % reverse_lazy('siloDetail', kwargs={"silo_id": silo.id}), "redirect": None}) return msgs if created: msgs.append({"silo_id": silo.id}) service = get_authorized_service(credential_obj) # fetch the google spreadsheet metadata try: spreadsheet = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute() except HttpAccessTokenRefreshError as e: return [get_credential_object(user, True)] except Exception as e: error = json.loads(e.content).get("error") msg = "%s: %s" % (error.get("status"), error.get("message")) msgs.append({"level": messages.ERROR, "msg": msg}) return msgs spreadsheet_name = spreadsheet.get("properties", {}).get("title", "") gsheet_read = get_or_create_read("GSheet Import", spreadsheet_name, "Google Spreadsheet Import", spreadsheet_id, user, silo) sheet_name = "Sheet1" if sheet_id: gsheet_read.gsheet_id = sheet_id gsheet_read.save() if gsheet_read.gsheet_id: sheets = spreadsheet.get("sheets", None) for sheet in sheets: properties = sheet.get("properties", None) if properties: if str(properties.get("sheetId")) == str(gsheet_read.gsheet_id): sheet_name = properties.get("title") headers = [] data = None combine_cols = False # Fetch data from gsheet try: result = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=sheet_name).execute() data = result.get("values", []) except Exception as e: logger.error(e) msgs.append({"level": messages.ERROR, "msg": "Something went wrong 22: %s" % e, "redirect": None}) return msgs unique_fields = silo.unique_fields.all() skipped_rows = set() for r, row in enumerate(data): if r == 0: headers = row; continue; filter_criteria = {} # build filter_criteria if unique field(s) have been setup for this silo for unique_field in unique_fields: try: filter_criteria.update({unique_field.name: row[headers.index(unique_field.name)]}) except KeyError: pass except ValueError: pass if filter_criteria: filter_criteria.update({'silo_id': silo.id}) # if a row is found, then fetch and update it # if no row is found then create a new one # if multiple rows are found then skip b/c not sure which one to update try: lvs = LabelValueStore.objects.get(**filter_criteria) lvs.edit_date = timezone.now() except LabelValueStore.DoesNotExist as e: lvs = LabelValueStore() except LabelValueStore.MultipleObjectsReturned as e: for k,v in filter_criteria.iteritems(): skipped_rows.add("%s=%s" % (k,v)) continue else: lvs = LabelValueStore() for c, col in enumerate(row): try: key = headers[c] except IndexError as e: #this happens when a column header is missing gsheet continue if key == "" or key is None or key == "silo_id": continue elif key == "id" or key == "_id": key = "user_assigned_id" elif key == "edit_date": key = "editted_date" elif key == "create_date": key = "created_date" val = smart_str(row[c], strings_only=True) key = smart_str(key) setattr(lvs, key.replace(".", "_").replace("$", "USD"), val) lvs.silo_id = silo.id lvs.create_date = timezone.now() lvs.save() combineColumns(silo.pk) if skipped_rows: msgs.append({"level": messages.WARNING, "msg": "Skipped updating/adding records where %s because there are already multiple records." % ",".join(str(s) for s in skipped_rows)}) msgs.append({"level": messages.SUCCESS, "msg": "Operation successful"}) return msgs
def saveAndImportRead(request): """ Saves ONA read if not already in the db and then imports its data """ if request.method != 'POST': return HttpResponseBadRequest("HTTP method, %s, is not supported" % request.method) read_type = ReadType.objects.get(read_type="JSON") name = request.POST.get('read_name', None) url = request.POST.get('read_url', None) owner = request.user description = request.POST.get('description', None) silo_id = None read = None silo = None provider = "ONA" try: silo_id = int(request.POST.get("silo_id", None)) except Exception as e: print(e) return HttpResponse("Silo ID can only be an integer") try: read, created = Read.objects.get_or_create(read_name=name, owner=owner, defaults={'read_url': url, 'type': read_type, 'description': description}) if created: read.save() except Exception as e: print(e) return HttpResponse("Invalid name and/or URL") # Fetch the data from ONA ona_token = ThirdPartyTokens.objects.get(user=request.user, name=provider) response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) existing_silo_cols = [] new_cols = [] show_mapping = False if silo_id <= 0: # create a new silo by the name of "name" silo = Silo(name=name, public=False, owner=owner) silo.save() silo.reads.add(read) else: # import into existing silo # Compare the columns of imported data with existing silo in case it needs merging silo = Silo.objects.get(pk=silo_id) lvs = json.loads(LabelValueStore.objects(silo_id=silo.id).to_json()) for l in lvs: existing_silo_cols.extend(c for c in l.keys() if c not in existing_silo_cols) for row in data: new_cols.extend(c for c in row.keys() if c not in new_cols) for c in existing_silo_cols: if c == "silo_id" or c == "create_date": continue if c not in new_cols: show_mapping = True if show_mapping == True: params = {'getSourceFrom':existing_silo_cols, 'getSourceTo':new_cols, 'from_silo_id':0, 'to_silo_id':silo.id} response = render_to_response("display/merge-column-form-inner.html", params, context_instance=RequestContext(request)) response['show_mapping'] = '1' return response if silo: # import data into this silo num_rows = len(data) #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo_id) return HttpResponse("View silo data at <a href='/silo_detail/%s' target='_blank'>See your data</a>" % silo.pk) return HttpResponse(read.pk)
def updateMergeSilo(request, pk): silo = None mapping = None try: silo = Silo.objects.get(id=pk) except Silo.DoesNotExist as e: return HttpResponse("Table (%s) does not exist" % pk) try: mapping = MergedSilosFieldMapping.objects.get(merged_silo = silo.pk) left_table_id = mapping.from_silo.pk right_table_id = mapping.to_silo.pk data = mapping.mapping merged_data = mergeTwoSilos(data, left_table_id, right_table_id) try: merged_data['status'] messages.error(request, 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message)) return HttpResponseRedirect(reverse_lazy('siloDetail', kwargs={'id': pk},)) except Exception as e: pass lvs = LabelValueStore.objects(silo_id=silo.id) num_rows_deleted = lvs.delete() # put the new silo data in mongo db. for counter, row in enumerate(merged_data): lvs = LabelValueStore() lvs.silo_id = silo.pk for l, v in row.iteritems(): if l == 'silo_id' or l == '_id' or l == 'create_date' or l == 'edit_date': continue else: setattr(lvs, l, v) lvs.create_date = timezone.now() result = lvs.save() except MergedSilosFieldMapping.DoesNotExist as e: # Check if the silo has a source from ONA: and if so, then update its data stop = False if silo.unique_fields.all().exists() == False: stop = True messages.info(request, "In order to update a table, it must have a unique field set.") read_type = ReadType.objects.get(read_type="JSON") reads = silo.reads.filter(type=read_type.pk) for read in reads: ona_token = ThirdPartyTokens.objects.get(user=silo.owner.pk, name="ONA") response = requests.get(read.read_url, headers={'Authorization': 'Token %s' % ona_token.token}) data = json.loads(response.content) # import data into this silo num_rows = len(data) if num_rows == 0: continue counter = None #loop over data and insert create and edit dates and append to dict for counter, row in enumerate(data): skip_row = False #if the value of unique column is already in existing_silo_data then skip the row for unique_field in silo.unique_fields.all(): filter_criteria = {'silo_id': silo.pk, unique_field.name: row[unique_field.name]} if LabelValueStore.objects.filter(**filter_criteria).count() > 0: skip_row = True continue if skip_row == True: continue # at this point, the unique column value is not in existing data so append it. lvs = LabelValueStore() lvs.silo_id = silo.pk for new_label, new_value in row.iteritems(): if new_label is not "" and new_label is not None and new_label is not "edit_date" and new_label is not "create_date": setattr(lvs, new_label, new_value) lvs.create_date = timezone.now() result = lvs.save() if num_rows == (counter+1): combineColumns(silo.pk) # reset num_rows num_rows = 0 read_types = ReadType.objects.filter(Q(read_type="GSheet Import") | Q(read_type="Google Spreadsheet")) reads = silo.reads.filter(reduce(or_, [Q(type=read.id) for read in read_types])) for read in reads: # get gsheet authorized client and the gsheet id to fetch its data into the silo storage = Storage(GoogleCredentialsModel, 'id', silo.owner, 'credential') credential = storage.get() credential_json = json.loads(credential.to_json()) #self.stdout.write("%s" % credential_json) if credential is None or credential.invalid == True: messages.error(request, "There was a Google credential problem with user: %s for gsheet %s" % (request.user, read.pk)) continue suc = import_from_google_spreadsheet(credential_json, silo, read.resource_id) if suc == False: messages.error(request, "Failed to import data from gsheet %s " % read.pk) if not reads: stop = True messages.info(request, "Tables that only have a CSV source cannot be updated.") return HttpResponseRedirect(reverse_lazy('siloDetail', kwargs={'id': pk},))
def mergeTwoSilos(mapping_data, lsid, rsid, msid): """ @params mapping_data: data that describes how mapping is done between two silos lsid: Left Silo ID rsid: Right Silo ID msid: Merge Silo ID """ mappings = json.loads(mapping_data) l_unmapped_cols = mappings.pop('left_unmapped_cols') r_unampped_cols = mappings.pop('right_unmapped_cols') merged_cols = [] #print("lsid:% rsid:%s msid:%s" % (lsid, rsid, msid)) l_silo_data = LabelValueStore.objects(silo_id=lsid) r_silo_data = LabelValueStore.objects(silo_id=rsid) # Loop through the mapped cols and add them to the list of merged_cols for k, v in mappings.iteritems(): col_name = v['right_table_col'] if col_name == "silo_id" or col_name == "create_date": continue if col_name not in merged_cols: merged_cols.append(col_name) for lef_col in l_unmapped_cols: if lef_col not in merged_cols: merged_cols.append(lef_col) for right_col in r_unampped_cols: if right_col not in merged_cols: merged_cols.append(right_col) # retrieve the left silo try: lsilo = Silo.objects.get(pk=lsid) except Silo.DoesNotExist as e: msg = "Left Silo does not exist: silo_id=%s" % lsid logger.error(msg) return {'status': "danger", 'message': msg} # retrieve the right silo try: rsilo = Silo.objects.get(pk=rsid) except Silo.DoesNotExist as e: msg = "Right Table does not exist: table_id=%s" % rsid logger.error(msg) return {'status': "danger", 'message': msg} # retrieve the merged silo try: msilo = Silo.objects.get(pk=msid) except Silo.DoesNotExist as e: msg = "Merged Table does not exist: table_id=%s" % msid logger.error(msg) return {'status': "danger", 'message': msg} # retrieve the unique fields set for the right silo r_unique_fields = rsilo.unique_fields.all() if not r_unique_fields: msg = "The table, [%s], must have a unique column and it should be the same as the one specified in [%s] table." % (rsilo.name, lsilo.name) logger.error(msg) return {'status': "danger", 'message': msg} # retrive the unique fields of the merged_silo m_unique_fields = msilo.unique_fields.all() # make sure that the unique_fields from right table are in the merged_table # by adding them to the merged_cols array. for uf in r_unique_fields: if uf.name not in merged_cols: merged_cols.append(uf.name) #make sure to set the same unique_fields in the merged_table if not m_unique_fields.filter(name=uf.name).exists(): unique_field, created = UniqueFields.objects.get_or_create(name=uf.name, silo=msilo, defaults={"name": uf.name, "silo": msilo}) # Get the correct set of data from the right table for row in r_silo_data: merged_row = OrderedDict() for k in row: # Skip over those columns in the right table that sholdn't be in the merged_table if k not in merged_cols: continue merged_row[k] = row[k] # now set its silo_id to the merged_table id merged_row["silo_id"] = msid merged_row["create_date"] = timezone.now() filter_criteria = {} for uf in r_unique_fields: try: filter_criteria.update({str(uf.name): str(merged_row[uf.name])}) except KeyError as e: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in all rows of the right_table logger.warning("The field, %s, is not present in table id=%s" % (uf.name, rsid)) # adding the merged_table_id because the filter criteria should search the merged_table filter_criteria.update({'silo_id': msid}) #this is an upsert operation.; note the upsert=True db.label_value_store.update_one(filter_criteria, {"$set": merged_row}, upsert=True) # Retrieve the unique_fields set by left table l_unique_fields = lsilo.unique_fields.all() if not l_unique_fields: msg = "The table, [%s], must have a unique column and it should be the same as the one specified in [%s] table." % (lsilo.name, rsilo.name) logger.error(msg) return {'status': "danger", 'message': msg} for uf in l_unique_fields: # if there are unique fields that are not in the right table then show error if not r_unique_fields.filter(name=uf.name).exists(): msg = "Both tables (%s, %s) must have the same column set as unique fields" % (lsilo.name, rsilo.name) logger.error(msg) return {"status": "danger", "message": msg} # now loop through left table and apply the mapping for row in l_silo_data: merged_row = OrderedDict() # Loop through the column mappings for each row in left_table. for k, v in mappings.iteritems(): merge_type = v['merge_type'] left_cols = v['left_table_cols'] right_col = v['right_table_col'] # if merge_type is specified then there must be multiple columns in the left_cols array if merge_type: mapped_value = '' for col in left_cols: if merge_type == 'Sum' or merge_type == 'Avg': try: if mapped_value == '': mapped_value = float(row[col]) else: mapped_value = float(mapped_value) + float(row[col]) except Exception as e: msg = 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message) logger.error(msg) return {'status': "danger", 'message': msg} else: mapped_value += ' ' + smart_str(row[col]) # Now calculate avg if the merge_type was actually "Avg" if merge_type == 'Avg': mapped_value = mapped_value / len(left_cols) # only one col in left table is mapped to one col in the right table. else: col = str(left_cols[0]) if col == "silo_id": continue try: mapped_value = row[col] except KeyError as e: # When updating data in merged_table at a later time, it is possible # the origianl source tables may have had some columns removed in which # we might get a KeyError so in that case we just skip it. continue #right_col is used as in index of merged_row because one or more left cols map to one col in right table merged_row[right_col] = mapped_value # Get data from left unmapped columns: for col in l_unmapped_cols: if col in row: merged_row[col] = row[col] filter_criteria = {} for uf in l_unique_fields: try: filter_criteria.update({str(uf.name): str(merged_row[uf.name])}) except KeyError: # when this excpetion occurs, it means that the col identified # as the unique_col is not present in all rows of the left_table msg ="The field, %s, is not present in table id=%s" % (uf.name, lsid) logger.warning(msg) filter_criteria.update({'silo_id': msid}) # override the silo_id and create_date columns values to make sure they're not set # to the values that are in left table or right table merged_row["silo_id"] = msid merged_row["create_date"] = timezone.now() # Now update or insert a row if there is no matching record available res = db.label_value_store.update_one(filter_criteria, {"$set": merged_row}, upsert=True) # Make sure all rows have the same cols in the merged_silo combineColumns(msid) return {'status': "success", 'message': "Merged data successfully"}
def appendTwoSilos(mapping_data, lsid, rsid, msid): """ @params mapping_data: data that describes how mapping is done between two silos lsid: Left Silo ID rsid: Right Silo ID msid: Merge Silo ID """ mappings = json.loads(mapping_data) l_unmapped_cols = mappings.pop('left_unmapped_cols') r_unampped_cols = mappings.pop('right_unmapped_cols') merged_cols = [] #print("lsid:% rsid:%s msid:%s" % (lsid, rsid, msid)) l_silo_data = LabelValueStore.objects(silo_id=lsid) r_silo_data = LabelValueStore.objects(silo_id=rsid) # Loop through the mapped cols and add them to the list of merged_cols for k, v in mappings.iteritems(): col_name = v['right_table_col'] if col_name == "silo_id" or col_name == "create_date": continue if col_name not in merged_cols: merged_cols.append(col_name) for lef_col in l_unmapped_cols: if lef_col not in merged_cols: merged_cols.append(lef_col) for right_col in r_unampped_cols: if right_col not in merged_cols: merged_cols.append(right_col) # retrieve the left silo try: lsilo = Silo.objects.get(pk=lsid) except Silo.DoesNotExist as e: msg = "Table id=%s does not exist." % lsid logger.error(msg) return {'status': "danger", 'message': msg} # retrieve the right silo try: rsilo = Silo.objects.get(pk=rsid) except Silo.DoesNotExist as e: msg = "Right Table does not exist: table_id=%s" % rsid logger.error(msg) return {'status': "danger", 'message': msg} # retrieve the merged silo try: msilo = Silo.objects.get(pk=msid) except Silo.DoesNotExist as e: msg = "Merged Table does not exist: table_id=%s" % msid logger.error(msg) return {'status': "danger", 'message': msg} # Delete Any existing data from the merged_table deleted_res = db.label_value_store.delete_many({"silo_id": msid}) # Get the correct set of data from the right table for row in r_silo_data: merged_row = OrderedDict() for k in row: # Skip over those columns in the right table that sholdn't be in the merged_table if k not in merged_cols: continue merged_row[k] = row[k] # now set its silo_id to the merged_table id merged_row["silo_id"] = msid merged_row["create_date"] = timezone.now() db.label_value_store.insert_one(merged_row) # now loop through left table and apply the mapping for row in l_silo_data: merged_row = OrderedDict() # Loop through the column mappings for each row in left_table. for k, v in mappings.iteritems(): merge_type = v['merge_type'] left_cols = v['left_table_cols'] right_col = v['right_table_col'] # if merge_type is specified then there must be multiple columns in the left_cols array if merge_type: mapped_value = '' for col in left_cols: if merge_type == 'Sum' or merge_type == 'Avg': try: if mapped_value == '': mapped_value = float(row[col]) else: mapped_value = float(mapped_value) + float(row[col]) except Exception as e: msg = 'Failed to apply %s to column, %s : %s ' % (merge_type, col, e.message) logger.error(msg) return {'status': "danger", 'message': msg} else: mapped_value += ' ' + smart_str(row[col]) # Now calculate avg if the merge_type was actually "Avg" if merge_type == 'Avg': mapped_value = mapped_value / len(left_cols) # only one col in left table is mapped to one col in the right table. else: col = str(left_cols[0]) if col == "silo_id": continue try: mapped_value = row[col] except KeyError as e: # When updating data in merged_table at a later time, it is possible # the origianl source tables may have had some columns removed in which # we might get a KeyError so in that case we just skip it. continue #right_col is used as in index of merged_row because one or more left cols map to one col in right table merged_row[right_col] = mapped_value # Get data from left unmapped columns: for col in l_unmapped_cols: if col in row: merged_row[col] = row[col] merged_row["silo_id"] = msid merged_row["create_date"] = timezone.now() db.label_value_store.insert_one(merged_row) combineColumns(msid) return {'status': "success", 'message': "Appended data successfully"}