def importDatabase(filename, user): '''Imports the uploaded excel file to the database than deletes the excel file ''' df = pd.read_excel( os.path.join(current_app.config['UPLOAD_FOLDER'], filename)) for index, row in df.iterrows(): new_patient = Patient(user_id=user, status="undiag", diagnose=str(row[3])) featureA = Feature(featureName='A', featureValue=str(row[0]), classifier_id=1) featureB = Feature(featureName='B', featureValue=str(row[1]), classifier_id=1) featureC = Feature(featureName='C', featureValue=str(row[2]), classifier_id=1) new_patient.features.append(featureA) new_patient.features.append(featureB) new_patient.features.append(featureC) db.session.add(new_patient) db.session.commit() os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], filename))
def test18_geometryfield(self): "Testing the general GeometryField." Feature(name='Point', geom=Point(1, 1)).save() Feature(name='LineString', geom=LineString((0, 0), (1, 1), (5, 5))).save() Feature(name='Polygon', geom=Polygon(LinearRing((0, 0), (0, 5), (5, 5), (5, 0), (0, 0)))).save() Feature(name='GeometryCollection', geom=GeometryCollection( Point(2, 2), LineString((0, 0), (2, 2)), Polygon(LinearRing((0, 0), (0, 5), (5, 5), (5, 0), (0, 0))))).save() f_1 = Feature.objects.get(name='Point') self.assertEqual(True, isinstance(f_1.geom, Point)) self.assertEqual((1.0, 1.0), f_1.geom.tuple) f_2 = Feature.objects.get(name='LineString') self.assertEqual(True, isinstance(f_2.geom, LineString)) self.assertEqual(((0.0, 0.0), (1.0, 1.0), (5.0, 5.0)), f_2.geom.tuple) f_3 = Feature.objects.get(name='Polygon') self.assertEqual(True, isinstance(f_3.geom, Polygon)) f_4 = Feature.objects.get(name='GeometryCollection') self.assertEqual(True, isinstance(f_4.geom, GeometryCollection)) self.assertEqual(f_3.geom, f_4.geom[2])
def load_features_csv(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features delim = self.detect_delimiter(input_file) # print 'delim is %s' % delim with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=delim) next(reader, None) # skip the headers for elements in reader: if len(elements) == 5: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) identification = elements[4] # unused feature = Feature(feature_id, mz, rt, intensity, file_id) elif len(elements) == 4: feature_id = utils.num(elements[0]) mz = utils.num(elements[1]) rt = utils.num(elements[2]) intensity = utils.num(elements[3]) feature = Feature(feature_id, mz, rt, intensity, file_id) features.append(feature) return features
def vector_catalog_save_layer(tenant, layer, vector_layer, features): connection.close() connection.set_schema(tenant) features = VECTOR_LAYERS[vector_layer]['geometries_by_id'](features) with transaction.atomic(): union = GEOSGeometry('POINT EMPTY') keys = None for g, props in features: if not keys: keys = props.keys() union = union.union(g) g.transform(3857) s = hashlib.sha1() s.update(GeometryCollection(g).ewkb) props['shaid'] = s.hexdigest() f = Feature(layer=layer, geometry=GeometryCollection(g), properties=props) f.save() envelope = union.envelope.coords[0] layer.bounds = envelope[2] + envelope[0] layer.status = 0 layer.field_names = list(set(layer.field_names).union(set(keys))) layer.schema['properties'] = {n: "str" for n in layer.field_names} layer.save()
def process_shapefile(tenant, layer_id, srs): connection.close() connection.set_schema(tenant) l = Layer.objects.get(pk=layer_id) shape_path = "%s/uploads/shapefile/%s/%s.shp" % (settings.MEDIA_ROOT, tenant, l.pk) try: with fiona.open(shape_path, 'r') as collection: count = 0 min_bounds = OGRGeometry('POINT ({} {})'.format( collection.bounds[0], collection.bounds[1]), srs=srs).transform(4326, clone=True) max_bounds = OGRGeometry('POINT ({} {})'.format( collection.bounds[2], collection.bounds[3]), srs=srs).transform(4326, clone=True) l.bounds = min_bounds.coords + max_bounds.coords features = [] for index, record in enumerate(collection): try: geom = shape(record['geometry']) transformed_geom = OGRGeometry( geom.wkt, srs=srs).transform(3857, clone=True) transformed_geom_collection = GeometryCollection( transformed_geom.geos) s = hashlib.sha1() s.update(transformed_geom_collection.ewkb) properties = record['properties'] properties['fid'] = index properties['shaid'] = s.hexdigest() features.append( Feature(layer=l, geometry=transformed_geom_collection, properties=properties)) count += 1 except Exception as e: print "Feature excepton", e if count == 0: raise Exception("Layer needs to have at least one feature") Feature.objects.bulk_create(features) field_names = collection.schema['properties'].keys() field_names.append("fid") l.field_names = field_names l.properties = collection.schema['properties'] l.schema = collection.schema l.status = 0 l.save() finally: for path in glob.glob("%s/uploads/shapefile/%s/%s.*" % (settings.MEDIA_ROOT, tenant, l.pk)): os.remove(path)
def post(self, request, *args, **kwargs): feature = None tpi = request.META.get('HTTP_TEMP_POINT_ID', "none") aoi = request.POST.get('aoi') geometry = request.POST.get('geometry') geojson = json.loads(geometry) properties = geojson.get('properties') aoi = AOI.objects.get(id=aoi) job = getattr(aoi, 'job') project = getattr(job, 'project') template = properties.get('template') if properties else None # TODO: handle exceptions if template: template = FeatureType.objects.get(id=template) attrs = dict(aoi=aoi, job=job, project=project, analyst=request.user, template=template) geometry = geojson.get('geometry') geom_obj = GEOSGeometry(json.dumps(geometry)) attrs['the_geom'] = geom_obj county_list = Counties.objects.filter( poly__contains=geom_obj.centroid.wkt) county = None if len(county_list): county = str(county_list[0].name) try: feature = Feature(**attrs) feature.full_clean() if not feature.properties: feature.properties = {} if county: feature.properties['county'] = county feature.save() except ValidationError as e: response = HttpResponse(content=json.dumps( dict(errors=e.messages)), mimetype="application/json", status=400) response['Temp-Point-Id'] = tpi return response # This feels a bit ugly but it does get the GeoJSON into the response feature_json = serializers.serialize('json', [ feature, ]) feature_list = json.loads(feature_json) feature_list[0]['geojson'] = feature.geoJSON(True) response = HttpResponse(json.dumps(feature_list), mimetype="application/json") response['Temp-Point-Id'] = tpi return response
def importDatabase(filename, user): '''Imports the uploaded excel file to the database than deletes the excel file ''' df = pd.read_excel( os.path.join(current_app.config['UPLOAD_FOLDER'], filename)) user_id = _request_ctx_stack.top.current_user.get('sub') classifier = Classifier.query.filter_by(user_id=user_id).first() rows = df.shape[1] for index, row in df.iterrows(): new_patient = Patient(user_id=user, status="undiag", diagnose=str(row[row.size - 1])) for idx, r in enumerate(row): if (idx != row.size - 1): feature = Feature(featureName=df.columns[idx], featureValue=str(r), classifier_id=classifier.id) new_patient.features.append(feature) db.session.add(new_patient) db.session.commit() r = Feature.query.with_entities( Feature.featureName).filter_by(classifier_id=classifier.id).distinct() classifier.numberOfFeatureTypes = r.count() db.session.add(classifier) db.session.commit() os.remove(os.path.join(current_app.config['UPLOAD_FOLDER'], filename))
def makeFeature(feature): ret = Feature(id=int(feature.get('id')), label=feature.get('label'), type=feature.findtext("TYPE"), start=int(feature.findtext("START")), end=int(feature.findtext("END"))) return ret
def add_feature(data, user): feature = Feature() feature.name = data[NAME] feature.group = data[GROUP] feature.updated_by = user feature.save() return feature
def feature_create(): if not flask_login.current_user.is_enabled: abort(403) obj = Feature() for k, v in request.json.iteritems(): setattr(obj, k, v) if obj.area_id is None: return make_response( jsonify({'validationErrors': ['An area should be specified']}), 409) user = flask_login.current_user obj.client_id = user.client.id db.session.add(obj) # associate as Supporter max_priority = db.session.query(func.max(Supporter.priority)) \ .filter(Supporter.client_id == user.client.id).first() supporter = Supporter() supporter.client_id = user.client.id supporter.feature_id = obj.id supporter.priority = 0 if max_priority[0] is None else (max_priority[0] + 1) db.session.add(supporter) db.session.commit() return jsonify({ 'id': obj.id, 'msgType': 'info', 'msgText': 'Feature created' })
def post(self, request, *args, **kwargs): feature = None aoi = request.POST.get('aoi') geometry = request.POST.get('geometry') geojson = json.loads(geometry) properties = geojson.get('properties') aoi = AOI.objects.get(id=aoi) job = getattr(aoi, 'job') project = getattr(job, 'project') template = properties.get('template') if properties else None # TODO: handle exceptions if template: template = FeatureType.objects.get(id=template) attrs = dict(aoi=aoi, job=job, project=project, analyst=request.user, template=template) geometry = geojson.get('geometry') attrs['the_geom'] = GEOSGeometry(json.dumps(geometry)) try: response = Feature(**attrs) response.full_clean() response.save() except ValidationError as e: return HttpResponse(content=json.dumps(dict(errors=e.messages)), mimetype="application/json", status=400) return HttpResponse([response], mimetype="application/json")
def test_basic_validation(self): feature = Feature( account=self.account, name="Crm.Business", ) try: feature.full_clean() except ValidationError as e: self.assertEqual(e.message_dict, {'__all__': [u'Name must start with a letter and contain only letters, numbers and underscore.']})
def post(self): new_feature = [] feature_name = self.request.get("feature-name") if (feature_name != ""): new_feature = Feature( parent=ndb.Key(urlsafe=self.request.get("idea-key")), title=feature_name) new_feature.put() self.redirect(self.request.referer)
def create_feature(): if not request.json: abort(400) feature = Feature(title=request.json['title'], description=request.json['description'], client_id=request.json['client_id'], client_priority=request.json['client_priority'], target_date=request.json['target_date'], product_area_id=request.json['product_area_id']) session.add(feature) session.commit() return jsonify({'message': 'Added feature'}), 201
def _set_features(self, test_mode=False): assigned_genes_limit = self.exp_setting.get_assigned_genes_limit() for idx, feature in enumerate(self.features): if feature is not None: # get gnids # TODO: cover for fsid == 0 which is the case of samll assgined gene at random if self.exp_setting.get_fsid( ) == 0: # small gene mode (test mode) feature_name = 'SAG_RND_t%02d' % (idx + 1) corresp_tissue = idx + 1 pars = ('g', corresp_tissue, '0,0', assigned_genes_limit[idx]) random_assigned_gene = Pgsql.Common.select_data( sqls.get_gene_tissues_random, pars) assigned_gene = Pgsql.Common.to_list(random_assigned_gene) else: res = Pgsql.Common.select_data( sqls.get_features_info_by_fsid_corresp_tissue, ( self.exp_setting.get_fsid(), idx + 1, )) feature_name = res[0][0] assigned_genes_str = res[0][1] corresp_tissue = res[0][2] assigned_gene = list( map(int, assigned_genes_str.split(','))) #if test_mode: # pars = ('g', corresp_tissue, assigned_genes_str, assigned_genes_limit[idx]) # #print('Randomly assigned genes, Feature name: %s' % feature_name) # #print(sqls.get_gene_tissues_random % pars) # random_assigned_gene = Pgsql.Common.select_data(sqls.get_gene_tissues_random, pars) # assigned_gene = Pgsql.Common.to_list(random_assigned_gene) #print('random assigned gene: \n{}'.format(assigned_gene)) if self.gnid_min_max: assigned_gene = [ gnid for gnid in assigned_gene if self.gnid_min_max[0] <= gnid <= self.gnid_min_max[1] ] #print (len(assigned_gene)) #print(assigned_gene) #print('after applying min_max assigned gene: \n{}'.format(assigned_gene)) self.features[idx] = Feature( name=feature_name, class_size=self.exp_setting.get_class_size(), assigned_genes=assigned_gene, corresp_tissue=corresp_tissue)
def map_features(map_id): m = db.session.query(Map).get(map_id) if not m: abort(404) if request.method == 'POST': if not request.json: abort(400) feature = Feature(request.json) m.features.append(feature) db.session.add(m) db.session.commit() return jsonify(feature.to_dict()) features = [f.to_dict() for f in m.features] return jsonify(FeatureCollection(features))
def edit(id=0): setExits() id = cleanRecordID(id) if id < 0: flash("That is not a valid ID") return redirect(g.listURL) if db: if not request.form: """ if no form object, send the form page """ # get the Org record if you can rec = None if id > 0: rec = Feature.query.filter_by(ID=id).first_or_404() return render_template('feature/feature_edit.html', rec=rec) #have the request form if validForm(): try: if int(id) > 0: rec = Feature.query.get(id) else: ## create a new record stub rec = Feature(request.form['featureClass'],request.form['featureValue']) db.session.add(rec) #update the record rec.featureClass = request.form['featureClass'] rec.featureValue = request.form['featureValue'] db.session.commit() return redirect(url_for('.display')) except Exception as e: flash(printException('Could not save record. Unknown Error',"error",e)) # form not valid - redisplay return render_template('feature/feature_edit.html', rec=request.form) else: flash(printException('Could not open database'),"info") return redirect(url_for('.display'))
def post(self, request, *args, **kwargs): feature = None aoi = request.POST.get('aoi') geometry = request.POST.get('geometry') geojson = json.loads(geometry) properties = geojson.get('properties') aoi = AOI.objects.get(id=aoi) job = getattr(aoi, 'job') project = getattr(job, 'project') template = properties.get('template') if properties else None # TODO: handle exceptions if template: template = FeatureType.objects.get(id=template) attrs = dict(aoi=aoi, job=job, project=project, analyst=request.user, template=template) geometry = geojson.get('geometry') attrs['the_geom'] = GEOSGeometry(json.dumps(geometry)) try: feature = Feature(**attrs) feature.full_clean() feature.save() except ValidationError as e: return HttpResponse(content=json.dumps(dict(errors=e.messages)), mimetype="application/json", status=400) # This feels a bit ugly but it does get the GeoJSON into the response feature_json = serializers.serialize('json', [ feature, ]) feature_list = json.loads(feature_json) feature_list[0]['geojson'] = feature.geoJSON(True) return HttpResponse(json.dumps(feature_list), mimetype="application/json")
def load_features_sima(self, input_file, file_id): features = [] if not os.path.exists(input_file): return features with open(input_file, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter='\t') feature_id = 1 for elements in reader: mass = float(elements[0]) charge = float(elements[1]) mass = mass / charge intensity = utils.num(elements[2]) rt = utils.num(elements[3]) feature = Feature(feature_id, mass, rt, intensity, file_id) if len(elements) > 4: # for debugging with synthetic data gt_peak_id = utils.num(elements[4]) gt_metabolite_id = utils.num(elements[5]) gt_adduct_type = elements[6] feature.gt_metabolite = gt_metabolite_id feature.gt_adduct = gt_adduct_type features.append(feature) feature_id = feature_id + 1 return features
def _match_precursor_bins(self, file_data, mass_tol, rt_tol): if self.verbose: print "Matching precursor bins" sys.stdout.flush() # check we aren't missing any features input_features_count = 0 for j in range(len(self.data_list)): features = self.data_list[j].features input_features_count += len(features) alignment_files = [] alignment_feature_to_precursor_cluster = {} clustered_features_count = 0 for j in range(len(self.data_list)): file_clusters = file_data[j] # TODO: using posterior mass and rts # print 'Using posterior mass and rts' # file_post_masses = [cluster.mu_mass for cluster in file_clusters] # file_post_rts = [cluster.mu_rt for cluster in file_clusters] # just using hte average of all the members file_post_masses = [] file_post_rts = [] for cluster in file_clusters: assert len(cluster.best_clustering) > 0, 'Empty cluster detected!!' avg_mass = np.array([poss.transformed_mass for peak, poss in cluster.best_clustering]).mean() avg_rt = np.array([peak.rt for peak, poss in cluster.best_clustering]).mean() file_post_masses.append(avg_mass) file_post_rts.append(avg_rt) clustered_features_count += len(cluster.best_clustering) file_post_fingerprints = [cluster.word_counts for cluster in file_clusters] this_file = AlignmentFile("file_" + str(j), self.verbose) peak_id = 0 row_id = 0 for n in range(len(file_clusters)): cluster = file_clusters[n] mass = file_post_masses[n] rt = file_post_rts[n] intensity = 0 fingerprint = file_post_fingerprints[n] # initialise alignment feature alignment_feature = Feature(peak_id, mass, rt, intensity, this_file, fingerprint=fingerprint) alignment_feature_to_precursor_cluster[alignment_feature] = cluster # initialise row alignment_row = AlignmentRow(row_id) alignment_row.features.append(alignment_feature) peak_id = peak_id + 1 row_id = row_id + 1 this_file.rows.append(alignment_row) alignment_files.append(this_file) # do the matching Options = namedtuple('Options', 'dmz drt exact_match verbose use_fingerprint') my_options = Options(dmz=mass_tol, drt=rt_tol, exact_match=False, verbose=self.verbose, use_fingerprint=False) matched_results = AlignmentFile("", True) num_files = len(alignment_files) input_count = 0 output_count = 0 for i in range(num_files): if self.verbose: print "Processing file %d" % i alignment_file = alignment_files[i] input_count += len(alignment_file.get_all_features()) + len(matched_results.get_all_features()) matched_results.reset_aligned_status() alignment_file.reset_aligned_status() matcher = MaxWeightedMatching(matched_results, alignment_file, my_options) matched_results = matcher.do_matching() output_count += len(matched_results.get_all_features()) assert input_count == output_count, "input %d output %d" % (input_count, output_count) # map the results back to the original bin objects results = [] for row in matched_results.rows: temp = [] for alignment_feature in row.features: cluster = alignment_feature_to_precursor_cluster[alignment_feature] temp.append(cluster) tup = tuple(temp) results.append(tup) # turn this into a matching of peak features total_aligned_features = 0 alignment_results = [] for bin_res in results: matched_list = self._match_adduct_features(bin_res) for features in matched_list: total_aligned_features += len(features) res = AlignmentResults(peakset=features, prob=1.0) alignment_results.append(res) assert input_features_count == clustered_features_count assert input_features_count == total_aligned_features return alignment_results
def execute_gene(self, feature_rows, strain_id): features = {} sequence = None transcript = None gene_id = None min_start = None max_end = None for feature_row in feature_rows: # Loop through annotation rows in the gff file, all related to the current gene # keep track of start and end start = feature_row[3] end = feature_row[4] direction = "forward" if feature_row[6] == "+" else "reverse" chromosome_id = feature_row[0] feature_type = feature_row[2] attribs = feature_row[8].strip() # This causes bugs. # if feature_type == "gene": # Handle gene entries # gene_id = attribs.split(";")[0].split(":")[1] # grab the gene ID - we'll want this for later new_gene_id = self.find_attribs_value("ID=Gene", attribs) if new_gene_id != None: # only deal with proper genes. setting gene_id to None means nothing else will be processed. # so it will essentially skip non-"gene" entries. if feature_type != "gene": gene_id = None continue # Check against filter list if there is one if self.filter_genes != None and new_gene_id not in self.filter_genes: # filter list exists, and gene is not in filter list # skip this gene return gene_id = new_gene_id # add the Gene entry - if it hasn't been already if gene_id not in self.genes_seen: gene = Gene(gene_id) self.genes_to_write.append(gene) self.genes_seen[gene_id] = gene elif gene_id != None : # Handle transcript entries - if the gene is legit transcript_id = self.find_attribs_value("ID=Transcript", attribs) if transcript_id != None: # it's a transcript entry # add the Transcript entry - if it hasn't been already transcript_id = self.ensure_unique_transcript_id(transcript_id) if transcript_id not in self.transcripts_seen: transcript = Transcript( id=transcript_id, gene_id=gene_id ) self.transcripts_to_write.append(transcript) self.transcripts_seen[transcript.id] = transcript else: # Handle transcript feature entries # for some reason, features for a given strain/transcript # combination are not always added transcript_id = self.find_attribs_value("Parent=Transcript", attribs) if transcript_id != None: # it's a transcript feature entry # put a filter here? some elements are not worth storing? self.features_to_write.append(Feature( transcript_id=transcript_id, type_id=feature_row[2], strain_id=strain_id, chromosome_id=chromosome_id, start=start, end=end, direction=direction )) else: pass # this happens for pseudogenes and TEs - which we aint interested in
def _feature_individual(cls, genotype: Genotype, feature_function: Callable[[Genotype], int], index: int=0): return Feature( index=index, number=feature_function(genotype) )
def save_features(request, strain_id): """ seqid = line[0] source = line[1] type = line[2] start = line[3] end = line[4] score = line[5] strand = line[6] # "+", "-", or "." phase = line[7] attributes = line[8] """ theStrain = Strain.objects.get(pk = strain_id) gffFile = request.FILES['gff_file'] gff = gffFile.read() gff = gff.split('###')[0] # Throw away the sequence gff = [x.split('\t') for x in gff.splitlines() if x[0] != '#'] # Throw away the header comments. Now we're left with just the meat of the file contigMap = {} for seqid, source, featureType, start, end, score, strand, phase, attributes in gff: attributeParts = attributes.split(';') attributeParts = [x.split('=') for x in attributeParts] attributeParts = [(x[0], x[1].split(',')) for x in attributeParts] attributeParts = [(x[0], [urllib.unquote(y) for y in x[1]]) for x in attributeParts] attributeDict = {} for key, value in attributeParts: attributeDict[key] = value if featureType == 'contig': # We need to add this to the contigMap try: contigName = attributeDict['dbxref'][0].split(':')[-1] except KeyError: contigName = attributeDict['ID'][0] contigMap[seqid] = contigName elif featureType == 'chromosome': contigMap[seqid] = seqid else: # This is an actual feature line. It is assumed that we have already gone through all the contig lines theContig = get_object_or_404(Contig, name=contigMap[seqid] ) # Get the Contig we're going to point to feature = Feature() feature.contig = theContig try: feature.feature_id = attributeDict['ID'][0] except KeyError: pass else: # This one has a name that might be found in the Reference table if feature.feature_id.find(theStrain.name) != -1: # Yup, it's one we need to link to the Reference table referenceName = feature.feature_id.split("_")[0] feature.reference = Reference.objects.get(feature_name = referenceName) else: # just try and see if there is a reference with this unmodified feature_id try: feature.reference = Reference.objects.get(feature_name = feature.feature_id) except ObjectDoesNotExist: pass if 'Parent' in attributeDict: parent = get_object_or_404(Feature, feature_id = attributeDict['Parent'][0], contig = theContig) feature.parent = parent feature.feature_type = featureType feature.start_coord = int(start) feature.stop_coord = int(end) feature.strand = strand feature.createdDate = datetime.datetime.now() feature.modifiedDate = datetime.datetime.now() feature.save() return HttpResponseRedirect('/strains/')