def get_nearest_obj(self,ra_in,dec_in,size=2): a=self.BC.assets #find a bounding box to speed up search (size depended on search regon) if (ra_in<60) or (ra_in>308.5): if (dec_in+size)<-1: size=abs(dec_in+1) elif (dec_in-size)>1: size=abs(dec_in-1) else: if (dec_in+size)<-3: size=abs(dec_in+3) elif (dec_in-size)>76: size=abs(dec_in-76) box=geo_bounding_box(ra_in,dec_in,size=size) #this distance is correct up to multiplications factors (order is correct) dis=func.asin(func.sqrt(func.power(func.sin(0.5*func.radians(dec_in-a.dec)),2) + func.cos(func.radians(dec_in))*func.cos(func.radians(a.dec))*func.power(func.sin(.5*func.radians(ra_in-a.ra)),2))) if isinstance(box[0],list): #the search wraps around 360 result=self.session.query(a.name,a.id,a.ra,a.dec,a.location).filter(between(a.dec,box[1][0],box[1][1])).filter((between(a.ra,box[0][0][0],box[0][0][1]))|(between(a.ra,box[0][1][0],box[0][1][1]))).order_by(dis.asc()).first() else: result=self.session.query(a.name,a.id,a.ra,a.dec,a.location).filter(between(a.ra,box[0][0],box[0][1])).filter(between(a.dec,box[1][0],box[1][1])).order_by(dis.asc()).first() #result=self.session.query(a.name,a.id,a.ra,a.dec,a.location).order_by(dis.asc()).first() if result is None: result=self.get_nearest_obj(ra_in,dec_in,size=size+20) return result
def report_submission_and_queue(args): """Counts submissions and queue updates. Provide a summary of: * number of submissions * number of queue updates """ with closing(lsession()) as session: date_end = args.date_end date_start = args.date_start args.output.write('/'.join(map(str, [date_start, date_end])) + '\n') submissions_query = session.query(Submission).\ filter( between( Submission.submission_date, date_start, date_end)).\ filter(Submission.email != '*****@*****.**') submissions = submissions_query.count() submissions_assimilated = session.query(Submission).\ filter( between( Submission.submission_date, date_start, date_end)).\ filter(Submission.email != '*****@*****.**').\ filter(Submission.assimilated == True).\ count() submission_cruises = session.query(distinct(QueueFile.expocode)).filter( QueueFile.submission_id.in_([x.id for x in submissions_query.all()])).all() queued = session.query(QueueFile).\ filter( between( QueueFile.date_received, date_start, date_end)).\ filter(QueueFile.merged != 2).\ count() queued_and_merged = session.query(QueueFile).\ filter( between( QueueFile.date_received, date_start, date_end)).\ filter(QueueFile.merged == 1).\ count() args.output.write( 'Submissions from {0}/{1}:\n'.format(date_start, date_end)) args.output.write( '# submissions: {0}\n'.format(submissions)) args.output.write( '# submissions assimilated: {0}\n'.format(submissions_assimilated)) args.output.write( '# cruises with submitted files: {0}\n'.format(len(submission_cruises))) args.output.write( '# queued: {0}\n'.format(queued)) args.output.write( '# queued and merged: {0}\n'.format(queued_and_merged)) args.output.write( '# queued and not merged: {0}\n'.format(queued - queued_and_merged))
def allUpdatesSource(series=None): A = db.sequence_series2_updates.alias(name="A") #B = db.sequence_series2_updates.alias(name="B") maxIndex = None if series is None: maxIndex = db.connection.execute( sql.select((func.max(A.c.dummy_id),)) ).fetchone()[0] else: maxIndex = db.connection.execute( sql.select((func.max(A.c.dummy_id),)).where(A.c.source==series) ).fetchone()[0] if maxIndex is None: print("No updates found") return print("Total update records: %d" % maxIndex) #maxIndex = 4949 groupSize = 5000 if args.stop_at_id > 0: maxIndex = args.stop_at_id total = 0 for fromId in range(args.start_from_id, maxIndex, groupSize): q = None if series is None: q = sql.select(( A.c.dummy_id, A.c.sequence_id, func.length(A.c.content))).where( sql.and_( # A.c.dummy_id < B.c.dummy_id, # A.c.sequence_id == B.c.sequence_id, # A.c.source == B.c.source, sql.between(A.c.dummy_id, fromId, min(fromId+groupSize-1, maxIndex)) )).order_by(A.c.dummy_id) else: q = sql.select(( A.c.dummy_id, A.c.sequence_id, func.length(A.c.content))).where( sql.and_( # A.c.dummy_id < B.c.dummy_id, # A.c.sequence_id == B.c.sequence_id, A.c.source == series, sql.between(A.c.dummy_id, fromId, min(fromId+groupSize-1, maxIndex)) )).order_by(A.c.dummy_id) #print("Executing query for range(%d, %d)..." % (fromId, fromId+groupSize)) result = db.connection.execute( q ) pairRecords = result.fetchall() for r in pairRecords: yield r total += len(pairRecords) print("Total: %d" % total)
def sitting_filter(domain_model): return sql.or_( domain_model.start_date.between(sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.end_date.between(sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_(domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def session_filter(domain_model): return sql.or_( domain_model.start_date.between(sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.ParliamentSession.end_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_(domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def group_membership_filter(domain_model): return sql.or_( domain_model.start_date.between(sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.end_date.between(sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_(domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def sitting_filter(domain_model): return sql.or_( domain_model.start_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.end_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_( domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def session_filter(domain_model): return sql.or_( domain_model.start_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.ParliamentSession.end_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_( domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def _filter_one(self, model, field=None, condition=None): ''' Create a single SQLAlchemy filter condition ''' # if no field present return the original condition if not field: return condition # Prepare field and value lower_field, lower_value, lower_value_2 = self._bind_and_lower_value( field) # Handle postgresql arrays if any if isinstance(field.type, postgresql.ARRAY): condition = field.any(self.value, operator=opdict[self.operator]) return condition # Handle scalar values # Return SQLAlchemy condition based on operator value # self.name is parameter name, lower_field is Table.parameterName if self.operator == '==': condition = lower_field.__eq__(lower_value) elif self.operator == '<': condition = lower_field.__lt__(lower_value) elif self.operator == '<=': condition = lower_field.__le__(lower_value) elif self.operator == '>': condition = lower_field.__gt__(lower_value) elif self.operator == '>=': condition = lower_field.__ge__(lower_value) elif self.operator == '!=': condition = lower_field.__ne__(lower_value) elif self.operator == '=': if isinstance(field.type, sqltypes.TEXT) or \ isinstance(field.type, sqltypes.VARCHAR) or \ isinstance(field.type, sqltypes.String): # this operator maps to LIKE # x=5 -> x LIKE '%5%' (x contains 5) # x=5* -> x LIKE '5%' (x starts with 5) # x=*5 -> x LIKE '%5' (x ends with 5) field = getattr(model, self.name) value = self.value if value.find('*') >= 0: value = value.replace('*', '%') condition = field.ilike(bindparam(self.bindname, value)) else: condition = field.ilike('%' + bindparam(self.bindname, value) + '%') else: # if not a text column, then use "=" as a straight equals condition = lower_field.__eq__(lower_value) elif self.operator == 'between': # between condition condition = between(lower_field, lower_value, lower_value_2) elif self.operator in ['&', '|']: # bitwise operations condition = lower_field.op(self.operator)(lower_value) > 0 return condition
def query(self): items_query = Session().query(self.domain_class).filter( self.domain_class.status.in_(self.filter_states) ) if len(self.item_filters): for (key, value) in self.item_filters.iteritems(): column = getattr(self.domain_class, key) #!+SCHEDULING(mb, Jan-2011) extend query spec to include sql filters if "date" in key: if "|" in value: start, end = value.split("|") if start and end: expression = sql.between(column, start, end) elif start: expression = (column>=value) elif end: expression = (column<=value) else: continue else: expression = (column==value) else: expression = (column==value) items_query = items_query.filter(expression) if self.group_filter: if hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id==self.group_id ) return tuple(items_query)
def get_sittings_between(sittings, start, end): modifier = sittings.getQueryModifier() sittings.setQueryModifier( sql.and_( modifier, sql.or_( sql.between(schema.sitting.c.start_date, start, end), sql.between(schema.sitting.c.end_date, start, end), sql.between(start, schema.sitting.c.start_date, schema.sitting.c.end_date), sql.between(end, schema.sitting.c.start_date, schema.sitting.c.end_date)), )) query = sittings._query sittings.setQueryModifier(modifier) return query
def tree_stats(request, treedef, tree, parentid): tree_table = datamodel.get_table(tree) parentid = None if parentid == 'null' else int(parentid) node = getattr(models, tree_table.name) descendant = aliased(node) node_id = getattr(node, node._id) descendant_id = getattr(descendant, node._id) treedef_col = tree_table.name + "TreeDefID" same_tree_p = getattr(descendant, treedef_col) == int(treedef) is_descendant_p = sql.and_( sql.between(descendant.nodeNumber, node.nodeNumber, node.highestChildNodeNumber), same_tree_p) target, make_joins = getattr(StatsQuerySpecialization, tree)() target_id = getattr(target, target._id) direct_count = sql.cast( sql.func.sum(sql.case([(sql.and_(target_id != None, descendant_id == node_id), 1)], else_=0)), types.Integer) all_count = sql.func.count(target_id) with models.session_context() as session: query = session.query(node_id, direct_count, all_count) \ .join(descendant, is_descendant_p) \ .filter(node.ParentID == parentid) \ .group_by(node_id) query = make_joins(request.specify_collection, query, descendant_id) results = list(query) return HttpResponse(toJson(results), content_type='application/json')
def group_membership_filter(domain_model): return sql.or_( domain_model.start_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), domain_model.end_date.between( sql.bindparam("start_date"), sql.bindparam("end_date")), sql.between(sql.bindparam("start_date"), domain_model.start_date, domain_model.end_date), sql.between(sql.bindparam("end_date"), domain_model.start_date, domain_model.end_date), sql.and_( domain_model.start_date <= sql.bindparam("end_date"), domain_model.end_date == None), )
def query(self): items_query = Session().query(self.domain_class) if not IScheduleText.implementedBy(self.domain_class): items_query = items_query.filter( self.domain_class.status.in_(self.filter_states)) if len(self.item_filters): for (key, value) in self.item_filters.iteritems(): column = getattr(self.domain_class, key) #!+SCHEDULING(mb, Jan-2011) extend query spec to include sql filters if "date" in key: if "|" in value: start, end = value.split("|") if start and end: expression = sql.between(column, start, end) elif start: expression = (column >= value) elif end: expression = (column <= value) else: continue else: expression = (column == value) else: expression = (column == value) items_query = items_query.filter(expression) if self.group_filter and not IScheduleText.implementedBy( self.domain_class): if hasattr(self.domain_class, "parliament_id") and self.group_id: items_query = items_query.filter( self.domain_class.parliament_id == self.group_id) elif hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id == self.group_id) return tuple(items_query)
def get_sittings_between(sittings, start, end): modifier = sittings.getQueryModifier() sittings.setQueryModifier( sql.and_( modifier, sql.or_( sql.between(schema.sitting.c.start_date, start, end), sql.between(schema.sitting.c.end_date, start, end), sql.between(start, schema.sitting.c.start_date, schema.sitting.c.end_date), sql.between(end, schema.sitting.c.start_date, schema.sitting.c.end_date) ), )) query = sittings._query sittings.setQueryModifier(modifier) return query
def _filterByStartEnd(self, q, params): if params.startfoid and params.endfoid: q = q.filter(between(Order.order_id, params.startfoid, params.endfoid)) elif params.startfoid: q = q.filter(Order.order_id > params.startfoid) elif params.endfoid: q = q.filter(Order.order_id < params.endfoid) return q
def filter_one(self, DataModelClass, field=None, condition=None): """ Return the condition as a SQLAlchemy query condition """ if not isinstance(field, type(None)): # Prepare field and value lower_field, lower_value, lower_value_2 = self.bindAndLowerValue( field) # Handle Arrays if isinstance(field.type, postgresql.ARRAY): condition = field.any(self.value, operator=opdict[self.op]) else: # Do Normal Scalar Stuff # Return SQLAlchemy condition based on operator value # self.name is parameter name, lower_field is Table.parameterName if self.op == '==': condition = lower_field.__eq__(lower_value) elif self.op == '<': condition = lower_field.__lt__(lower_value) elif self.op == '<=': condition = lower_field.__le__(lower_value) elif self.op == '>': condition = lower_field.__gt__(lower_value) elif self.op == '>=': condition = lower_field.__ge__(lower_value) elif self.op == '!=': condition = lower_field.__ne__(lower_value) elif self.op == '=': if isinstance(field.type, sqltypes.TEXT) or \ isinstance(field.type, sqltypes.VARCHAR) or \ isinstance(field.type, sqltypes.String): # this operator maps to LIKE # x=5 -> x LIKE '%5%' (x contains 5) # x=5* -> x LIKE '5%' (x starts with 5) # x=*5 -> x LIKE '%5' (x ends with 5) field = getattr(DataModelClass, self.name) value = self.value if value.find('*') >= 0: value = value.replace('*', '%') condition = field.ilike( bindparam(self.bindname, value)) else: condition = field.ilike( '%' + bindparam(self.bindname, value) + '%') else: # if not a text column, then use "=" as a straight equals condition = lower_field.__eq__(lower_value) elif self.op == 'between': # between condition condition = between(lower_field, lower_value, lower_value_2) elif self.op in ['&', '|']: # bitwise operations condition = lower_field.op(self.op)(lower_value) > 0 return condition
def delete_subtree(self): logging.debug("Deleting subtree %s" % self) # Because the children relationship is dynamic, we have to manually sort # the deletes to satisfy fk constraints for block in Ipblock.query\ .filter(Ipblock.version == self.version)\ .filter(Ipblock.layer3domain_id == self.layer3domain_id)\ .filter(Ipblock.prefix >= self.prefix)\ .filter(between(Ipblock.address, self.ip.network.address, self.ip.broadcast.address))\ .order_by(Ipblock.prefix.desc()).all(): db.session.delete(block)
def find(self, config): hits = [] for t in config['targets']: latitude = float(t['center'][0]) longitude = float(t['center'][1]) radius = float(t['radius']) locations = session.query(Location.id).filter(between(Location.latitude, latitude-radius, latitude+radius)).filter(between(Location.longitude, longitude-radius, longitude+radius)).all() for l in locations: if l[0] not in hits: hits.append(l[0]) matches = session.query(ra.id).filter(ra.location_id.in_(hits)).all() mismatches = session.query(ra.id).filter(~ra.location_id.in_(hits)).all() return matches, mismatches
def filter_one(self, DataModelClass, field=None, condition=None): """ Return the condition as a SQLAlchemy query condition """ if not isinstance(field, type(None)): # Prepare field and value lower_field, lower_value, lower_value_2 = self.bindAndLowerValue(field) # Handle Arrays if isinstance(field.type, postgresql.ARRAY): condition = field.any(self.value, operator=opdict[self.op]) else: # Do Normal Scalar Stuff # Return SQLAlchemy condition based on operator value # self.name is parameter name, lower_field is Table.parameterName if self.op == '==': condition = lower_field.__eq__(lower_value) elif self.op == '<': condition = lower_field.__lt__(lower_value) elif self.op == '<=': condition = lower_field.__le__(lower_value) elif self.op == '>': condition = lower_field.__gt__(lower_value) elif self.op == '>=': condition = lower_field.__ge__(lower_value) elif self.op == '!=': condition = lower_field.__ne__(lower_value) elif self.op == '=': if isinstance(field.type, sqltypes.TEXT) or \ isinstance(field.type, sqltypes.VARCHAR) or \ isinstance(field.type, sqltypes.String): # this operator maps to LIKE # x=5 -> x LIKE '%5%' (x contains 5) # x=5* -> x LIKE '5%' (x starts with 5) # x=*5 -> x LIKE '%5' (x ends with 5) field = getattr(DataModelClass, self.name) value = self.value if value.find('*') >= 0: value = value.replace('*', '%') condition = field.ilike(bindparam(self.bindname, value)) else: condition = field.ilike('%' + bindparam(self.bindname, value) + '%') else: # if not a text column, then use "=" as a straight equals condition = lower_field.__eq__(lower_value) elif self.op == 'between': # between condition condition = between(lower_field, lower_value, lower_value_2) elif self.op in ['&', '|']: # bitwise operations condition = lower_field.op(self.op)(lower_value) > 0 return condition
def update_mcps(session, min_mag=2, max_mag=30): mcps = MCPS.__table__ session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.u,min_mag, max_mag))).values(u=None, u_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.b,min_mag, max_mag))).values(b=None, b_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.v,min_mag, max_mag))).values(v=None, v_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.i,min_mag, max_mag))).values(i=None, i_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.j,min_mag, max_mag))).values(j=None, j_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.h,min_mag, max_mag))).values(h=None, h_err=None)) session.execute(update(mcps).where(sql.not_(sql.between(mcps.c.k,min_mag, max_mag))).values(k=None, k_err=None)) session.commit()
def kosher_request(request): url = request.args.get('p', app.config['WARNING_URL']) # check blacklist if request.remote_addr in app.config['USER_BLACKLIST']: return False # check if allowed site if not any([s in url for s in app.config['ALLOWED_SITES']]): return False # # TIME SENSITIVE CHECKING PER IP # set = AccessLog.query.filter_by(origin=request.remote_addr) now = datetime.now() ago_minute = now - timedelta(minutes=1) ago_hour = now - timedelta(hours=1) ago_day = now - timedelta(days=1) past_minute = len(set.filter(between(AccessLog.when, ago_minute, now)).all()) past_hour = len(set.filter(between(AccessLog.when, ago_hour, now)).all()) past_day = len(set.filter(between(AccessLog.when, ago_day, now)).all()) if past_minute > app.config['SONGS_PER_MINUTE']: return False if past_hour > app.config['SONGS_PER_HOUR']: return False if past_day > app.config['SONGS_PER_DAY']: return False return True
def batch_serialize(type_key="*", start_date=None, end_date=None): """Serialize all objects of `type_key` or all types if with a wildcard(*) as the type key. Item set may be filtered by status date (start_date and/or end date) range. """ # keep count of serialized objects for feedback serialized_count = 0 # list of domain classes to be serialized domain_models = [] if type_key == "*": types_vocab = get_vocabulary("serializable_type") # we add the legislature and the chamber first for term in types_vocab(None): if term.value in ("legislature", "chamber"): info = capi.get_type_info(term.value) domain_models.append(info.domain_model) # we add the rest now for term in types_vocab(None): if term.value == "*": continue if term.value not in ("legislature", "chamber"): info = capi.get_type_info(term.value) domain_models.append(info.domain_model) else: info = capi.get_type_info(type_key) if info.workflow: domain_models.append(info.domain_model) session = Session() for domain_model in domain_models: query = session.query(domain_model) if IWorkflowed.implementedBy(domain_model) and (start_date or end_date): column = domain_model.status_date if start_date and end_date: expression = sql.between(column, start_date, end_date) elif start_date: expression = (column>=start_date) elif end_date: expression = (column<=end_date) query = query.filter(expression) objects = query.all() # !+FILTER(ah, 2014-09-19) adding a filter here - sometimes there is a mismatch # between the count shown on the screen i.e. X items sent for serialization and # and only X-n items appear in the queue - there seem to be empty objects returned # sometimes, so eliminating those objects = filter(None, objects) map(queue_object_serialization, objects) log.error(" COUNTING_TYPES_SERIALIZED -- %s COUNT -- %s", domain_model, len(objects)) serialized_count += len(objects) return serialized_count
def search_ekg(self): s = model.Session() from_date = model.DateUtil.parseDate(request.params.get('from_date'), True) to_date = model.DateUtil.parseDate(request.params.get('to_date'), True) from_date = model.DateUtil.startOfTheDay(from_date) to_date = model.DateUtil.endOfTheDay(to_date) data = s.query(model.XrayEKG).filter( between(model.xray_ekg_tb.c.ekg_date, from_date, to_date)).filter_by(deleted=False).order_by( model.XrayEKG.ekg_date.desc()).all() results = [] for d in data: results.append({ 'id': d.id, 'ekg_date': model.DateUtil.toShortFormatDate(d.ekg_date), 'ekg_no': d.ekg_no, 'result': d.result, 'doctor_send': d.doctor_send, 'is_done': d.is_done, 'done_date': model.DateUtil.toShortFormatDate(d.done_date), 'hn': d.patient.hn, 'an': d.visit.an if d.visit.an else '', 'name': d.patient._get_name(), 'sex': u'ชาย' if d.patient.pa_sex == '1' else u'หญิง', 'age': model.DateUtil.calculateAge(d.patient.pa_birthdate), 'privilege': d.visit.privilege.name, 'building': d.visit.admit_main.building.building_name if d.visit.admit_main else '', 'remark': d.remark }) response.headers['content-type'] = 'text/plain' return simplejson.dumps({'data': results})
def query(self): items_query = Session().query(self.domain_class) if not IScheduleText.implementedBy(self.domain_class): items_query = items_query.filter( self.domain_class.status.in_(self.filter_states) ) if len(self.item_filters): for (key, value) in self.item_filters.iteritems(): column = getattr(self.domain_class, key) #!+SCHEDULING(mb, Jan-2011) extend query spec to include sql filters if "date" in key: if "|" in value: start, end = value.split("|") if start and end: expression = sql.between(column, start, end) elif start: expression = (column>=value) elif end: expression = (column<=value) else: continue else: expression = (column==value) else: expression = (column==value) items_query = items_query.filter(expression) if self.group_filter and not IScheduleText.implementedBy(self.domain_class): if hasattr(self.domain_class, "chamber_id") and self.group_id: # filter by the current chamber #!+(SCHEDULING, Oct-2013) Todo: rework to get group documents items_query = items_query.filter( self.domain_class.chamber_id== get_chamber_for_context(self.context).group_id ) elif hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id==self.group_id ) elif self.group_filter and IScheduleText.implementedBy(self.domain_class): if hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id==self.group_id ) return tuple(items_query)
def query(self): items_query = Session().query(self.domain_class) if not IScheduleText.implementedBy(self.domain_class): items_query = items_query.filter( self.domain_class.status.in_(self.filter_states)) if len(self.item_filters): for (key, value) in self.item_filters.iteritems(): column = getattr(self.domain_class, key) #!+SCHEDULING(mb, Jan-2011) extend query spec to include sql filters if "date" in key: if "|" in value: start, end = value.split("|") if start and end: expression = sql.between(column, start, end) elif start: expression = (column >= value) elif end: expression = (column <= value) else: continue else: expression = (column == value) else: expression = (column == value) items_query = items_query.filter(expression) if self.group_filter and not IScheduleText.implementedBy( self.domain_class): if hasattr(self.domain_class, "chamber_id") and self.group_id: # filter by the current chamber #!+(SCHEDULING, Oct-2013) Todo: rework to get group documents items_query = items_query.filter( self.domain_class.chamber_id == get_chamber_for_context( self.context).group_id) elif hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id == self.group_id) elif self.group_filter and IScheduleText.implementedBy( self.domain_class): if hasattr(self.domain_class, "group_id") and self.group_id: items_query = items_query.filter( self.domain_class.group_id == self.group_id) return tuple(items_query)
def messages_between_dates(self, from_date, to_date, message_type=None, add_pkcs7_data=False): """generator yielding messages with dmDeliveryTime between certain dates""" query = self.session.query(Message, SupplementaryMessageData).\ filter(Message.dmID==SupplementaryMessageData.message_id) if message_type: query = query.filter(SupplementaryMessageData.message_type==message_type) if to_date and not from_date: ms = query.filter(Message.dmDeliveryTime < to_date) elif from_date and not to_date: ms = query.filter(Message.dmDeliveryTime > from_date) elif from_date and to_date: ms = query.filter(between(Message.dmDeliveryTime, from_date, to_date)) else: ms = query for m, supp in ms: m.read_locally = supp.read_locally m.message_type = supp.message_type if add_pkcs7_data: self.add_pkcs7_data(m) yield m
def find(self, config): minimum = config['min'] maximum = config['max'] matches = session.query(ra.id).filter(between(ra.rooms, minimum, maximum)).order_by(ra.posted.desc()).all() mismatches = session.query(ra.id).filter(~between(ra.rooms, minimum, maximum)).order_by(ra.posted.desc()).all() return matches, mismatches
def common_path(request, db, where): dd = db.metadata.tables["device_data"] devices = db.metadata.tables["devices"] legs = db.metadata.tables["leg_modes"] users = db.metadata.tables["users"] # get data for specified date, or last 12h if unspecified date = request.args.get("date") # passed on to simplify_geometry maxpts = int(request.args.get("maxpts") or 0) mindist = int(request.args.get("mindist") or 0) # Exclude given comma-separated modes in processed path of path, stops by # default. Blank argument removes excludes exarg = request.args.get("exclude") exclude = True if exarg == "" else not_( legs.c.mode.in_((exarg or "STILL").split(","))) if date: start = datetime.strptime(date, '%Y-%m-%d').replace( hour=0, minute=0, second=0, microsecond=0) else: start = datetime.now() - timedelta(hours=12) end = start + timedelta(hours=24) # in the export link case, we get a date range firstday = request.args.get("firstday") firstday = firstday and datetime.strptime(firstday, '%Y-%m-%d') firstday = firstday or datetime.now() lastday = request.args.get("lastday") lastday = lastday and datetime.strptime(lastday, '%Y-%m-%d') lastday = lastday or firstday date_start = firstday.replace(hour=0, minute=0, second=0, microsecond=0) date_end = lastday.replace(hour=0, minute=0, second=0, microsecond=0) \ + timedelta(hours=24) if request.args.get("firstday") or request.args.get("lastday"): start, end = date_start, date_end # find end of user legs legsend = select( [func.max(legs.c.time_end).label("time_end")], where, devices.join(users).join(legs)).alias("legsend") # use user legs if available legsed = select( [ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), cast(legs.c.mode, String).label("activity"), legs.c.line_name, legs.c.time_start.label("legstart"), cast(legs.c.time_start, String).label("time_start"), cast(legs.c.time_end, String).label("time_end"), legs.c.id, dd.c.time], and_( where, legs.c.activity != None, exclude, dd.c.time >= start, dd.c.time < end), devices \ .join(users) \ .join(legs) \ .join(dd, and_( legs.c.device_id == dd.c.device_id, between(dd.c.time, legs.c.time_start, legs.c.time_end)))) # fall back on raw trace beyond end of user legs unlegsed = select( [ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), cast(dd.c.activity_1, String).label("activity"), literal(None).label("line_name"), literal(None).label("legstart"), literal(None).label("time_start"), literal(None).label("time_end"), literal(None).label("id"), dd.c.time], and_( where, dd.c.time >= start, dd.c.time < end, or_(legsend.c.time_end.is_(None), dd.c.time > legsend.c.time_end)), dd.join(devices).join(legsend, literal(True))) # Sort also by leg start time so join point repeats adjacent to correct leg query = legsed.union_all(unlegsed).order_by(text("time, legstart")) query = query.limit(35000) # sanity limit vs date range points = db.engine.execute(query) # re-split into legs, and the raw part segments = ( legpts for (legid, legpts) in dict_groups(points, ["legstart"])) features = [] for points in segments: # discard the less credible location points points = trace_discard_sidesteps(points, BAD_LOCATION_RADIUS) # simplify the path geometry by dropping redundant points points = simplify_geometry( points, maxpts=maxpts, mindist=mindist, keep_activity=True) features += trace_linestrings(points, ( 'id', 'activity', 'line_name', 'time_start', 'time_end')) return jsonify({'type': 'FeatureCollection', 'features': features})
def run(db_type, loc, returned_columns, host = None, port = None, username = None, password = None, constraints = None, row_limit = None, sql = False, distinct = True, commandline = False): db = get_connection(db_type, loc, host = host, port = port, username = username, password = password) elm_tables = build_elm_schema(db) elm_constraints = constraints logging.info('Pulling table and column Metadata from database') if not len(db._metadata.tables): db._metadata.reflect() sqa_tables = db._metadata.tables.values() logging.info('Loading all tables and columns objects using Metadata') for sqa_table in sqa_tables: elm_tables.append(elm_table(sqa_table)) logging.info('Building a list of necessary tables') ret_cols = [] for tab_col in returned_columns: table = tab_col[0] column = tab_col[1] ret_cols.append([table, column]) tabs_temp = set([table_column[0] for table_column in ret_cols]) # Generate a set of tables necessary for all the constraints con_tabs_temp = set() for con in elm_constraints: if con.bool_type == "" or con.bool_type is None: con_tabs_temp.add(con.table_name) else: for c in con.constraints: con_tabs_temp.add(c.table_name) tabs_temp = tabs_temp.union(con_tabs_temp) # Add the tables from the constraints to the validation col_tab_all = ret_cols[:] if len(elm_constraints) > 0: for con in elm_constraints: if con.bool_type == "" or con.bool_type is None: col_tab_all.append([con.table_name, con.column_name]) else: for c in con.constraints: col_tab_all.append([c.table_name, c.column_name]) #Validate the tables actually exist all_tables = set([t.name for t in sqa_tables]) for tc in col_tab_all: ta = str(tc[0]) co = str(tc[1]) if ta not in all_tables: raise Exception('Table ' + ta + ' doesn\'t exist in the current database') else: found = False for t in [t for t in sqa_tables if t.name == ta]: for tcn in [c.name for c in t._columns._all_cols]: if tcn == co: found = True if not found: raise Exception('Column ' + '%s.%s' % (ta,co) + ' doesn\'t exist in the current database') #Find the optimal JOIN path joins = join_sequence(list(sqa_tables), list(tabs_temp)) #Remove JOIN redundancy http://stackoverflow.com/a/480227 seen = set() seen_add = seen.add new_joins = [ x for x in joins if x not in seen and not seen_add(x)] joins = new_joins #Make it easy to find table data via a dictionary table_dict = {tab.name:tab.data for tab in elm_tables} #Instantiate the query -Needs to be the session.Query, not a simple select so we can add filters query = db.session.query() def get_column(sqa_table,column_name): for ec in sqa_table._columns._all_cols: if ec.name == column_name: return ec #Build the joins # -Needs to be performed before adding columns to prevent session.Query # from generating excess joins query = query.select_from(table_dict[joins[0]]) sqa_joins = table_dict[joins[0]] for i in range(1,len(joins)): second_table = table_dict[joins[i]] try: sqa_joins = sqa_joins.join(second_table) query = query.join(second_table) except AmbiguousForeignKeysError: #TODO: Assign a priority for primary keys from the order columns are requested first_column = None second_column = None first_table = table_dict[joins[i-1]] #Look through foreign keys of the second table for fk in second_table.foreign_keys: #If the key is both a primary key and from the first table if fk.column.primary_key and fk.column.table == first_table: #Assign the key found to the first table column first_column = fk.column #Assign the associated foreign key in the second table to the second table column second_column = fk.parent sqa_joins = sqa_joins.join(second_table, first_column == second_column) query = query.join(second_table, first_column == second_column) #Build the columns to select sqa_select_cols = [] #Find columns that don't have an aggregate #Find columns that do have an aggregate #Create list of columns to select # -(must be done together to preserve order) sqa_non_aggregate = [] sqa_aggregate = [] for tc in returned_columns: t = tc[0] c = tc[1] a = tc[2] ec = get_column(table_dict[t],c) if a == '': sqa_non_aggregate.append(ec) sqa_select_cols.append(ec) else: sqa_aggregate.append((a,ec)) aggregate = a column = ec if aggregate == 'COUNT': sqa_select_cols.append(func.count(column)) elif aggregate == 'SUM': sqa_select_cols.append(func.sum(column)) elif aggregate == 'AVG': sqa_select_cols.append(func.avg(column)) elif aggregate == 'MIN': sqa_select_cols.append(func.min(column)) elif aggregate == 'MAX': sqa_select_cols.append(func.max(column)) query = query.add_columns(*sqa_select_cols) if len(sqa_aggregate) > 0: for na in sqa_non_aggregate: query = query.group_by(na) #Add the constraints for ec in elm_constraints: if ec.bool_type == "": if ec.operator == "=": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) == ec.val1) elif ec.operator == ">": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) > ec.val1) elif ec.operator == ">=": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) >= ec.val1) elif ec.operator == "<": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) < ec.val1) elif ec.operator == "<=": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) <= ec.val1) elif ec.operator == "!=" or ec.operator == "<>": query = query.filter(get_column(table_dict[ec.table_name],ec.column_name) != ec.val1) elif ec.operator == "between" or ec.operator == "btw": query = query.filter(between(get_column(table_dict[ec.table_name],ec.column_name), ec.val1, ec.val2)) else: critereon = [] for ecn in ec.constraints: if ecn.operator == "=": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) == ecn.val1) elif ecn.operator == ">": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) > ecn.val1) elif ecn.operator == ">=": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) >= ecn.val1) elif ecn.operator == "<": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) < ecn.val1) elif ecn.operator == "<=": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) <= ecn.val1) elif ecn.operator == "!=" or ecn.operator == "<>": critereon.append(get_column(table_dict[ecn.table_name],ecn.column_name) != ecn.val1) elif ecn.operator == "between" or ecn.operator == "btw": critereon.append(between(get_column(table_dict[ecn.table_name],ecn.column_name), ecn.val1, ecn.val2)) if ec.bool_type == "OR": query = query.filter(or_(*critereon)) elif ec.bool_type == "AND": query = query.filter(and_(*critereon)) #Make rows distinct if distinct: query = query.distinct() #Limit the number of returned rows if row_limit: query = query.limit(row_limit) #Execute the full statement #db.engine._echo = True #TODO: fix the dialect hack dialect = select([get_column(table_dict[returned_columns[0][0]],returned_columns[0][1])]).bind.dialect if commandline: if sql: sql = compile_query_sqlite(query,dialect) sql = sqlparse.format(sql, reindent=True, keyword_case='upper') logging.info('Returned query ' + sql) print sql else: res = query.all() for row in res: logging.info('Data row ' + str(row)) print str(row) else: if db_type == "sqlite": sql = compile_query_sqlite(query,dialect) elif db_type == "oracle": sql = compile_query_oracle(query,dialect) elif db_type == "mysql": sql = compile_query_mysql(query,dialect) else: sql = "Unable to generate sql for " + db_type sql = sqlparse.format(sql, reindent=True, keyword_case='upper') logging.info('Returned query ' + sql) res = query.all() return sql, res
def getValuesByInterval(conn, query, start_timestamp=None, end_timestamp=None, granularity=None, null_tolerance=0): global dbload_min_timestamp plots = query.keys() types_map = { 'min': datapoint.c.min, 'max': datapoint.c.max, 'avg': datapoint.c.avg, 'lower_limit': datapoint.c.lower_limit, 'upper_limit': datapoint.c.upper_limit, 'warn_lower': datapoint.c.warn_lower, 'warn_upper': datapoint.c.warn_upper, 'warn_type': datapoint.c.warn_type, 'crit_lower': datapoint.c.crit_lower, 'crit_upper': datapoint.c.crit_upper, 'crit_type': datapoint.c.crit_type } types = set() for plot_types in query.values(): types = types.union(plot_types) if len(plots) == 0: return {} if start_timestamp == None: start_timestamp = 0 if end_timestamp == None: end_timestamp = time() if end_timestamp < start_timestamp: tmp = end_timestamp end_timestamp = start_timestamp start_timestamp = tmp start_timestamp = max(start_timestamp, dbload_min_timestamp) tfs = TimeFrame.getAll(conn) if granularity == None: now = time() for tf in tfs: if tf.retention_period != None and now - tf.retention_period > start_timestamp: continue if granularity == None or tf.interval < granularity: granularity = tf.interval granularity = max(granularity, (end_timestamp - start_timestamp) / 125) data_tf = None for tf in sorted(tfs, cmp=lambda x,y: cmp(x.interval, y.interval), reverse=True): if tf.interval < granularity and data_tf != None: break data_tf = tf granularity = data_tf.interval start_timestamp -= 1.5 * granularity end_timestamp += 1.5 * granularity if data_tf.retention_period != None: start_timestamp = max(start_timestamp, data_tf.retention_period - 2 * granularity) assert granularity > 0 # properly align interval with the timeframe start_timestamp = start_timestamp - start_timestamp % granularity hostservices = set([plot.hostservice for plot in plots]) comment_objs = Comment.getByHostServicesAndInterval(conn, hostservices, start_timestamp, end_timestamp) comments = [] for comment_obj in comment_objs: if comment_obj.hostservice.parent_hostservice != None: parent_service = comment_obj.hostservice.parent_hostservice.service.name else: parent_service = None comments.append({ 'id': comment_obj.id, 'host': comment_obj.hostservice.host.name, 'parent_service': parent_service, 'service': comment_obj.hostservice.service.name, 'timestamp': comment_obj.timestamp, 'comment_timestamp': comment_obj.comment_timestamp, 'author': comment_obj.author, 'text': comment_obj.text }) # status_objs = PluginStatus.getByHostServicesAndInterval(conn, hostservices, start_timestamp, end_timestamp) statusdata = [] # for status_obj in status_objs: # if status_obj.hostservice.parent_hostservice != None: # parent_service = status_obj.hostservice.parent_hostservice.service.name, # # else: # parent_service = None # # statusdata.append({ 'id': status_obj.id, 'host': status_obj.hostservice.host.name, # 'parent_service': parent_service, # 'service': status_obj.hostservice.service.name, # 'timestamp': status_obj.timestamp, 'status': status_obj.status }) st = time() sql_types = [datapoint.c.plot_id, datapoint.c.timestamp] for type in types_map.keys(): if type in types: sql_types.append(types_map[type]) plot_conds = tuple_(datapoint.c.plot_id).in_([(plot.id,) for plot in plots]) sel = select(sql_types, and_(datapoint.c.timeframe_id==data_tf.id, plot_conds, between(datapoint.c.timestamp, literal(start_timestamp) - literal(start_timestamp) % data_tf.interval, end_timestamp))) \ .order_by(datapoint.c.timestamp.asc()) et = time() print "Building SQL query took %f seconds" % (et - st) st = time() result = conn.execute(sel) et = time() print "SQL query took %f seconds" % (et - st) charts = OrderedDict() prev_rows = {} for plot in plots: chart = {} for type in query[plot]: chart[type] = [] charts[plot] = chart prev_rows[plot] = None print "Result rows: %d" % (result.rowcount) st = time() for row in result: plot = Plot.get(row[datapoint.c.plot_id]) assert plot != None chart = charts[plot] prev_row = prev_rows[plot] ts = row[datapoint.c.timestamp] plot_types = query[plot] if prev_row != None and \ row[datapoint.c.timestamp] - prev_row[datapoint.c.timestamp] > (null_tolerance + 1) * granularity: ts_null = prev_row[datapoint.c.timestamp] + (row[datapoint.c.timestamp] - prev_row[datapoint.c.timestamp]) / 2 for type in query[plot]: chart[type].append((ts_null, None)) for type in query[plot]: chart[type].append((ts, row[types_map[type]])) prev_rows[plot] = row et = time() print "Processing results took %f seconds" % (et - st) return { 'comments': comments, 'charts': charts, 'statusdata': statusdata, 'start_timestamp': start_timestamp, 'end_timestamp': end_timestamp, 'granularity': granularity }
def get_active_round(self): return db.session.query(Round).filter(between(datetime.datetime.utcnow(), Round.start_date, Round.end_date)).first()
def get_topics(self, **kw): """ Get a number of topic in a range of date or indices inputs: optional string title; //topic title optional int nums; //numbers of topics optional date max_time; optional date min_time; """ try: if not kw.has_key('nums'): nums = self.MAX_TOPIC_SIZE + 1 # see: https://github.com/bachbui2/Jminee/issues/7 else: #TODO: make sure kw['num'] is an int nums = int(kw['nums']) + 1 # see: https://github.com/bachbui2/Jminee/issues/7 user_id = request.identity['user'].user_id log.info("User %s get topics %s"%(user_id, str(kw))) if kw.has_key('title'): topics = DBSession.query(Topic).\ join(MemberTopic).\ filter(Topic.title==kw['title']).\ filter(MemberTopic.member_id==user_id).\ order_by(Topic.update_time.desc()).\ limit(nums).\ all() elif kw.has_key('max_time') and kw.has_key('min_time'): max_time = kw['max_time'] min_time = kw['min_time'] topics = DBSession.query(Topic).\ join(MemberTopic).\ filter(sql.between(Topic.time, min_time, max_time)).\ filter(MemberTopic.member_id==user_id).\ order_by(Topic.update_time.desc()).\ limit(nums).\ all() elif kw.has_key('max_time'): max_time = kw['max_time'] topics = DBSession.query(Topic).\ join(MemberTopic).\ filter(Topic.time<=max_time).\ filter(MemberTopic.member_id==user_id).\ order_by(Topic.update_time.desc()).\ limit(nums).\ all() elif kw.has_key('min_time'): min_time = kw['min_time'] topics = DBSession.query(Topic).\ join(MemberTopic).\ filter(Topic.time>=min_time).\ filter(MemberTopic.member_id==user_id).\ order_by(Topic.update_time.desc()).\ limit(nums).\ all() else: topics = DBSession.query(Topic).\ join(MemberTopic).\ filter(MemberTopic.member_id==user_id).\ order_by(Topic.update_time.desc()).\ limit(nums).\ all() more = False if len(topics) == nums: topics = topics[:nums-1] more = True for topic in topics: new_msg_cnt = DBSession.query(MemberSubject).\ join(Subject).\ filter(MemberSubject.member_id==user_id).\ filter(Subject.topic_id==topic.uid).count() topic.new_msg = new_msg_cnt log.info(new_msg_cnt) return dict(success=True, topics=topics, more=more) except Exception as e: #traceback.print_exc(file=sys.stdout) log.exception('Got exception') return dict(success=False)
def common_path(request, db, where): dd = db.metadata.tables["device_data"] devices = db.metadata.tables["devices"] legs = db.metadata.tables["leg_modes"] users = db.metadata.tables["users"] # get data for specified date, or last 12h if unspecified date = request.args.get("date") # passed on to simplify_geometry maxpts = int(request.args.get("maxpts") or 0) mindist = int(request.args.get("mindist") or 0) # Exclude given comma-separated modes in processed path of path, stops by # default. Blank argument removes excludes exarg = request.args.get("exclude") exclude = True if exarg == "" else not_( legs.c.mode.in_((exarg or "STILL").split(","))) if date: start = datetime.strptime(date, '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0) else: start = datetime.now() - timedelta(hours=12) end = start + timedelta(hours=24) # in the export link case, we get a date range firstday = request.args.get("firstday") firstday = firstday and datetime.strptime(firstday, '%Y-%m-%d') firstday = firstday or datetime.now() lastday = request.args.get("lastday") lastday = lastday and datetime.strptime(lastday, '%Y-%m-%d') lastday = lastday or firstday date_start = firstday.replace(hour=0, minute=0, second=0, microsecond=0) date_end = lastday.replace(hour=0, minute=0, second=0, microsecond=0) \ + timedelta(hours=24) if request.args.get("firstday") or request.args.get("lastday"): start, end = date_start, date_end # find end of user legs legsend = select([func.max(legs.c.time_end).label("time_end")], where, devices.join(users).join(legs)).alias("legsend") # use user legs if available legsed = select( [ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), cast(legs.c.mode, String).label("activity"), legs.c.line_name, legs.c.time_start.label("legstart"), cast(legs.c.time_start, String).label("time_start"), cast(legs.c.time_end, String).label("time_end"), legs.c.id, dd.c.time], and_( where, legs.c.activity != None, exclude, dd.c.time >= start, dd.c.time < end), devices \ .join(users) \ .join(legs) \ .join(dd, and_( legs.c.device_id == dd.c.device_id, between(dd.c.time, legs.c.time_start, legs.c.time_end)))) # fall back on raw trace beyond end of user legs unlegsed = select([ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), cast(dd.c.activity_1, String).label("activity"), literal(None).label("line_name"), literal(None).label("legstart"), literal(None).label("time_start"), literal(None).label("time_end"), literal(None).label("id"), dd.c.time ], and_( where, dd.c.time >= start, dd.c.time < end, or_(legsend.c.time_end.is_(None), dd.c.time > legsend.c.time_end)), dd.join(devices).join(legsend, literal(True))) # Sort also by leg start time so join point repeats adjacent to correct leg query = legsed.union_all(unlegsed).order_by(text("time, legstart")) query = query.limit(35000) # sanity limit vs date range points = db.engine.execute(query) # re-split into legs, and the raw part segments = (legpts for (legid, legpts) in dict_groups(points, ["legstart"])) features = [] for points in segments: # discard the less credible location points points = trace_discard_sidesteps(points, BAD_LOCATION_RADIUS) # simplify the path geometry by dropping redundant points points = simplify_geometry(points, maxpts=maxpts, mindist=mindist, keep_activity=True) features += trace_linestrings( points, ('id', 'activity', 'line_name', 'time_start', 'time_end')) return jsonify({'type': 'FeatureCollection', 'features': features})
def report_data_updates(args): """Counts updates within the time frame. Provide a summary of: * number of modifications to each file type * number of cruises with updated files """ with closing(lsession()) as session: date_end = args.date_end date_start = args.date_start args.output.write('/'.join(map(str, [date_start, date_end])) + '\n') docs = session.query(Document).\ filter( between( Document.LastModified, date_start, date_end)).\ filter(not_(Document.FileType.in_(types_to_ignore))).\ all() # count modifications of file types type_edit_cruises = {} type_add_cruises = {} cruises = set() drange = DateRange(date_start, date_end) for doc in docs: if 'original' in doc.FileName or 'Queue' in doc.FileName: continue details = [doc.LastModified, doc.ExpoCode, doc.FileName] log.info(' '.join(map(str, details))) if not doc.Modified or len(doc.Modified.split(',')) == 1: try: type_add_cruises[doc.FileType].append(doc.ExpoCode) except KeyError: type_add_cruises[doc.FileType] = [doc.ExpoCode] else: for mtime in doc.Modified.split(','): mtime = datetime.strptime(mtime, '%Y-%m-%d %H:%M:%S') if mtime in drange: log.info('\t{0}\n'.format(mtime)) cruises.add(doc.ExpoCode) try: type_edit_cruises[doc.FileType].append(doc.ExpoCode) except KeyError: type_edit_cruises[doc.FileType] = [doc.ExpoCode] else: pass log.info('\t{0} out of range\n'.format(mtime)) args.output.write( 'Data updates from {0}/{1}:\n'.format(date_start, date_end)) args.output.write( '# cruises supported: {0}\n'.format(session.query(distinct(Cruise.ExpoCode)).count())) args.output.write( '# cruises with data: {0}\n'.format(session.query(distinct(Document.ExpoCode)).count())) args.output.write( '# cruises with updated files: {0}\n'.format(len(cruises))) type_add_counts = {} for ftype, cruises in type_add_cruises.items(): type_add_counts[ftype] = len(cruises) args.output.write( '# files added: {0}\n'.format(sum(type_add_counts.values()))) type_edit_counts = {} for ftype, cruises in type_edit_cruises.items(): type_edit_counts[ftype] = len(cruises) args.output.write( '# file updates: {0}\n'.format(sum(type_edit_counts.values()))) args.output.write('File type add counts:\n') args.output.write(repr(type_add_counts) + '\n') args.output.write('File type edit counts:\n') args.output.write(repr(type_edit_counts) + '\n') args.output.write('Cruises with updated files:\n') args.output.write(repr(sorted(list(cruises))) + '\n') cruise_type_adds = {} for ftype, cruises in type_add_cruises.items(): for cruise in cruises: try: cruise_type_adds[cruise].add(ftype) except KeyError: cruise_type_adds[cruise] = set([ftype]) cruise_type_updates = {} for ftype, cruises in type_edit_cruises.items(): for cruise in cruises: try: cruise_type_updates[cruise].add(ftype) except KeyError: cruise_type_updates[cruise] = set([ftype]) args.output.write('Cruise file adds:\n') for cruise, types in cruise_type_adds.items(): args.output.write('{0},{1}\n'.format(cruise, types)) args.output.write('Cruise file updates:\n') for cruise, types in cruise_type_updates.items(): args.output.write('{0},{1}\n'.format(cruise, types)) args.output.write('Cruises with CTD adds:\n') for cruise, types in cruise_type_adds.items(): found = False for ttt in types: if 'CTD' in ttt: found = True if not found: continue args.output.write('{0}\n'.format(cruise)) args.output.write('Cruises with CTD updates:\n') for cruise, types in cruise_type_updates.items(): found = False for ttt in types: if 'CTD' in ttt: found = True if not found: continue args.output.write('{0}\n'.format(cruise))