Example #1
0
    def compile(self):
        ''' Compile the text files to DDStorm modules. '''
        self.source = set()
        self.custom = set()
        self.alias = Alias(self._conf)

        # Loop over library files and add *.txt files to source
        for path, subdirs, files in os.walk(self._conf.get("library_path")):
            for name in files:
                if (fnmatch(name, "*.txt")):
                    self.source.add(os.path.join(path, name))

        # Loop over custom files and add *.txt files to custom
        for path, subdirs, files in os.walk(self._conf.get("custom_path")):
            for name in files:
                if (fnmatch(name, "*.txt")):
                    self.custom.add(os.path.join(path, name))

        # Create module directory if not already present and delete all module files
        if (not os.path.isdir(self._conf.get("module_path"))):
            os.makedirs(self._conf.get("module_path"))
        for f in os.listdir(self._conf.get("module_path")):
            if (fnmatch(f, "*.module")):
                os.unlink(self._conf.get("module_path") + f)

        # Create a regex for calculating priority from filename
        self.priorityRegex = re.compile("(?<=\.)\d+$")

        # First sort files by priority then compile them to module
        for src in self._sortPriority(self.source):
            self._makeModule(src)
        for src in self._sortPriority(self.custom):
            self._makeModule(src)
Example #2
0
def main (args, app):
  for alias in Alias.FromConfig(app.config):
    if args.name == alias.name:
      alias.remove(app.config)
      app.config.save( )
      print ('removed', alias.format_url( ))
      break
Example #3
0
    def compile(self):
        ''' Compile the text files to DDStorm modules. '''
        self.source=set()
        self.custom=set()
        self.alias=Alias(self._conf)
        
        # Loop over library files and add *.txt files to source
        for path, subdirs, files in os.walk(self._conf.get("library_path")):
            for name in files:
                if(fnmatch(name, "*.txt")):
                    self.source.add(os.path.join(path, name))

        # Loop over custom files and add *.txt files to custom
        for path, subdirs, files in os.walk(self._conf.get("custom_path")):
            for name in files:
                if(fnmatch(name, "*.txt")):
                    self.custom.add(os.path.join(path, name))

        # Create module directory if not already present and delete all module files
        if(not os.path.isdir(self._conf.get("module_path"))):
            os.makedirs(self._conf.get("module_path"))
        for f in os.listdir(self._conf.get("module_path")):
            if(fnmatch(f, "*.module")):
                os.unlink(self._conf.get("module_path")+f)

        # Create a regex for calculating priority from filename
        self.priorityRegex=re.compile("(?<=\.)\d+$")

        # First sort files by priority then compile them to module
        for src in self._sortPriority(self.source):
            self._makeModule(src)
        for src in self._sortPriority(self.custom):
            self._makeModule(src)
Example #4
0
	def recv_captcha(self, mailfrom, msg):
		""" Receives and verifies captcha.
		"""
		subject = msg['Subject']
		orgidentifier = string.split(subject,' ')[-1]
		logging.debug('Orig CAPTCHA identifier\t: %s', orgidentifier)

		# TODO: Reject if original identifier is not in DB

		try:
			if msg.is_multipart():
				answer = msg.get_payload(0).get_payload().splitlines()[0].strip()
			else:
				answer = msg.get_payload().splitlines()[0].strip()
		except:
			return

		identifier = self.db.hash_data(Address(mailfrom).address, answer)
		match = self.db.get_captcha_word(identifier)
		if match != None and match['word'] == answer:
			# Update captcha status to CAPTCHA_APPROVED
			cid, rid, word = match

			adata = self.db.get_alias_data(rid)
			aobj  = Alias(**adata)
			user  = User(**self.db.get_user(uid=aobj.get_uid()))

			# send message to recipient's alias requesting mailfrom's permission to send
			msg = UserMessage('senderverify.senduserreq',			\
					  fromaddx	= self.cfg.SVCALIAS,		\
					  aliasaddx	= aobj.get_alias_address(),	\
					  useraddx	= user.get_account_address(),	\
					  requestor	= mailfrom)

			msg.generate_message_id(self.cfg.DOMAIN)

			self.db.set_captcha(msg['Message-ID'], '', cid, rid, self.db.CAPTCHA_APPROVED)

			logging.debug('Sending approval request to user %s', user.get_username())
			self.send(msg['From'], [user.get_forwarding_address()], msg)

			# Delete identifier from database
			#self.db.delete_captcha_identifier(identifier)

		else:
			# TOFIX: should replace with new captcha and increment numtries;
			pass
 def _build_alias(self):
     template = Environment(
         loader=FileSystemLoader(self.TEMPLATES_PATH)).get_template(
             self.VARIABLE_TEMPLATE)
     alias_map = Alias(self._base_path).get_map()
     with open('_alias.py', 'wb') as f:
         f.write(
             template.render(
                 dict(varName='g_aliasMap', value=repr(alias_map))))
     return VariablesLengthHelper(alias_map)
Example #6
0
 def parseMSOffice2011Plist(self, mru_file):
     plist = self.load_bplist(mru_file)
     if plist == None:
         return []
     aliases = []
     try:
         for n,item in enumerate(plist["14\File MRU\MSWD"]):
             aliases.append(Alias(data=item["File Alias"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["14\File MRU\XCEL"]):
             aliases.append(Alias(data=item["File Alias"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["14\File MRU\PPT3"]):
             aliases.append(Alias(data=item["File Alias"]).parse())
     except:
         pass
     return aliases
Example #7
0
 def parseSidebarlistsPlist(self, mru_file):
     plist = self.load_bplist(mru_file)
     if plist == None:
         return []
     aliases = []
     try:
         for n,item in enumerate(plist["systemitems"]['VolumesList']):
             try:
                 aliases.append(Alias(data=plist["systemitems"]['VolumesList'][n]['Alias']).parse())
             except Exception as e:
                 pass
     except:
        pass
     try:
         for n,item in enumerate(plist["favorites"]['VolumesList']):
             try:
                 pass
                 aliases.append(Alias(data=plist["systemitems"]['VolumesList'][n]['Alias']).parse())
             except:
                 pass
     except:
        pass
     return aliases
Example #8
0
 def parseRecentItemsPlist(self, mru_file):
     plist = self.load_bplist(mru_file)
     if plist == None:
         return []
     bookmarks = []
     aliases = []
     try:
         for n,item in enumerate(plist["RecentApplications"]["CustomListItems"]):
             bookmarks.append(Bookmark(data=item["Bookmark"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["RecentDocuments"]["CustomListItems"]):
             bookmarks.append(Bookmark(data=item["Bookmark"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["RecentServers"]["CustomListItems"]):
             bookmarks.append(Bookmark(data=item["Bookmark"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["Applications"]["CustomListItems"]):
             aliases.append(Alias(data=item["Alias"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["Documents"]["CustomListItems"]):
             aliases.append(Alias(data=item["Alias"]).parse())
     except:
         pass
     try:
         for n,item in enumerate(plist["Servers"]["CustomListItems"]):
             aliases.append(Alias(data=item["Alias"]).parse())
     except:
         pass
     return bookmarks, aliases
Example #9
0
class Profile(namedtuple('Profile', 'v e iv ie sim')):

    _aliases = Alias(set)

    _profiles = {
        'JdbcTableScan': tablescan,
        'JdbcProjectRel': projection,
        'JdbcFilterRel': selection,
        'JdbcJoinRel': join,
        'JdbcAggregateRel': aggregate,
        'JdbcToEnumerableConverter': jdbctoenumerate
    }

    @classmethod
    def _antialias(cls, columns):
        return set(flat(map(cls._aliases, columns)))

    @classmethod
    def build(cls, node, inputs):
        return cls._profiles[node.get('relOp')](node, inputs)
Example #10
0
 def parseFinderPlist(self, mru_file):
     plist = self.load_bplist(mru_file)
     if plist == None:
         return []
     bookmarks = []
     aliases = []
     try:
         for n,item in enumerate(plist["FXRecentFolders"]):
             try:
                 bookmarks.append(Bookmark(data=item["file-bookmark"]).parse())
             except:
                 pass
             try:
                 pass
                 aliases.append(Alias(data=item["file-data"]["_CFURLAliasData"]).parse())
             except:
                 pass
     except:
         pass
     return bookmarks, aliases
    uid = row[0]
    login = row[1].strip()
    name = row[2]
    user_type = row[7].strip()
    location = row[4]
    email = row[5]

    unmask[uid] = uid

    m = fakeusr_rex.search(login)
    if m is not None:
        record_type = USR_FAKE
    else:
        record_type = USR_REAL

    a = Alias(record_type, uid, login, name, email, location, user_type)
    aliases[uid] = a

    # - email
    d_uid_email[a.uid] = a.email
    if a.email is not None:
        d_email_uid.setdefault(a.email, set([a.uid]))
        d_email_uid[a.email].add(a.uid)

    # - prefix
    d_uid_prefix[a.uid] = a.email_prefix
    d_uid_comp_prefix[a.uid] = a.email_prefix
    if a.email_prefix is not None:
        if len(a.email_prefix.split('.')) > 1 or len(
                a.email_prefix.split('_')) > 1:
            d_comp_prefix_uid.setdefault(a.email_prefix, set([a.uid]))
Example #12
0
def get_alias_map (conf):
  aliases = { }
  for alias in Alias.FromConfig(conf):
    aliases[alias.name] = alias
  return aliases
Example #13
0
 def add_alias(self, left: str, right: str):
     inst = Alias("{}_{}".format(left, right), self, self.get_net(right), self.get_net(left))
     self._instances["{}_{}".format(left, right)] = inst
     return inst
Example #14
0
class Compile:
    '''
    This class creates a compiler for the DDStorm
    that compiles the text files containing list of
    differential diagnosis to simplified modular
    data files usable by the program.
    '''
    
    def __init__(self, conf=False):
        '''
        The constructor optionally accepts a configuration.
        If none is provided it creates a default configuration.

        Parameters:
        conf - A dictionary containing configuration options
        '''
        if(conf):
            self._conf=conf
        else:
            self._conf=Conf()
        self.clean=True

    def compile(self):
        ''' Compile the text files to DDStorm modules. '''
        self.source=set()
        self.custom=set()
        self.alias=Alias(self._conf)
        
        # Loop over library files and add *.txt files to source
        for path, subdirs, files in os.walk(self._conf.get("library_path")):
            for name in files:
                if(fnmatch(name, "*.txt")):
                    self.source.add(os.path.join(path, name))

        # Loop over custom files and add *.txt files to custom
        for path, subdirs, files in os.walk(self._conf.get("custom_path")):
            for name in files:
                if(fnmatch(name, "*.txt")):
                    self.custom.add(os.path.join(path, name))

        # Create module directory if not already present and delete all module files
        if(not os.path.isdir(self._conf.get("module_path"))):
            os.makedirs(self._conf.get("module_path"))
        for f in os.listdir(self._conf.get("module_path")):
            if(fnmatch(f, "*.module")):
                os.unlink(self._conf.get("module_path")+f)

        # Create a regex for calculating priority from filename
        self.priorityRegex=re.compile("(?<=\.)\d+$")

        # First sort files by priority then compile them to module
        for src in self._sortPriority(self.source):
            self._makeModule(src)
        for src in self._sortPriority(self.custom):
            self._makeModule(src)

    def _sortPriority(self, files):
        ''' Sort data files based on their priority settings. '''
        ls=[]
        # Loop over the files
        for addr in files:
            # Format the file name
            name=os.path.splitext(os.path.basename(addr))[0].lower().replace("_"," ").replace("-", " ")
            # Search for priority tag on file name
            m=re.search(self.priorityRegex, name)
            # Add to ls as (symptom name, priority number, file name) with default priority of 100
            if(m):
                ls.append((name.replace("."+m.group(), ""), int(m.group()), addr))
            else:
                ls.append((name, 100, addr))
        # Sort the file list, first by the symptom name, then by the priority number
        ls.sort(reverse=True)
        if(ls):
            return(list(zip(*ls))[2])
        else:
            return ls
        
    def _makeModule(self, src):
        ''' Create application usable modules from data files. '''
        # Format the file name
        module=os.path.splitext(os.path.basename(src))[0].lower().replace("_"," ").replace("-", " ")
        # Remove the priority tag from file name
        m=re.search(self.priorityRegex, module)
        if(m):
            module=module.replace("."+m.group(), "")
        # Create the module file name
        modFile=self._conf.get("module_path")+module+".module"
        modFlag=False
        # Loop over both files, the source data file and the target module file
        with open(src, "r") as sf, open(modFile, "a") as tf:
            # Ignore lines starting with ! or #, + and - has special meaning, write other lines to module. Log the errors.
            for line in sf:
                line=line.strip().split("#")[0]
                if(len(line)==0):
                    pass
                elif(line.startswith("!")):
                    pass
                elif(line.startswith("#")):
                    pass
                elif(line.startswith("+")):
                    modFlag=True
                elif(line.startswith("-")):
                    modFlag=True
                elif(line.replace(" ","").replace("-","").replace("_","").replace("'","").isalnum()):
                    print(self.alias.get(line).capitalize(), file=tf)
                else:
                    self.clean=False
                    logging.warning("Syntax error in file '"+src+"': "+line)
        # Deal with special lines
        if(modFlag):
            modFlag=False
            with open(src, "r") as f:
                for line in f:
                    line=line.strip().split("#")[0]
                    if(line[1:].replace(" ","").replace("-","").replace("_","").replace("'","").isalnum()):
                        # If line starts with + add it to the module file
                        if(line.startswith("+")):
                            with open(modFile, "r") as fn:
                                text=fn.read()
                            with open(modFile, "w") as fn:
                                print(self.alias.get(line[1:]).capitalize()+"\n"+text, file=fn)
                        # If line starts with - remove corresponding item from the module file
                        elif(line.startswith("-")):
                            with open(modFile, "r") as fn:
                                text=fn.read()
                            text=text.replace(self.alias.get(line[1:]).capitalize()+"\n", "")
                            with open(modFile, "w") as fn:
                                print(text, file=fn)

    def is_clean(self):
            '''Report if compilation ended successfully'''
            return self.clean
Example #15
0
	def apply_aliasing(self, user, mailfrom, rcpttos, msg):
		""" Applies an alias as the sender of a message or attempts to infer
		it if none was given by the sender.

		Handles cases 1d and 1e of the specification.
		"""
		usralias = None
		alias_addx = None
		is_alias_address = lambda entry: entry.parse_alias_address()

		logging.debug('Attempting to apply aliasing')

		# look for use of existing alias in To field (case 1d);
		for cur_addx in msg.search_header_addresses('to', is_alias_address):
			alias_pair = cur_addx.parse_alias_address()
			alias_data = self.db.get_alias_data(*alias_pair,	\
							    uid=user.get_uid())

			if not alias_data:
				continue

			usralias = Alias(**alias_data)
			alias_addx = cur_addx

			#if not usralias.is_active():
			#	continue

			# remove alias from rcpttos and all To fields
			for i in range(len(rcpttos)):
				if alias_addx == rcpttos[i]:
					del rcpttos[i]
					break

			msg.replace_address('to', alias_addx, None)
			break

		# if no alias in To field, try to infer the correct one (case 1e);
		if not alias_addx:

			logging.debug("Couldn't find alias to use in headers; " \
				      'attempting to infer correct alias')

			alias_data = self.db.infer_alias(user.get_uid(),
							 msg.get_header_addresses('to'),
							 user.get_salt())

			if not alias_data:
				logging.debug('Failed to infer alias')

				err = ErrorMessage('applyalias.noinfer',
						   fromaddx = self.cfg.SVCALIAS,
						   toaddx = user.get_account_address(),
						   subject = msg['Subject'])
	
				self.send(err['From'], [user.get_forwarding_address()], err)

				return False

			usralias = Alias(**alias_data)

			#if not usralias.is_active():
			#	return False

			logging.debug('Succesfully inferred alias "%s"', str(usralias))

			alias_addx = Address(usralias.get_alias_address())

		# if we found an alias to use, apply it, send the
		# message, and record in history table;
		alias_addx.realname = Address(mailfrom).realname

		msg.replace_address('from', None, alias_addx)
		#del msg['message-id']

		if rcpttos == []:
			logging.info('No recipients left; ignoring');
			return

		rcpt_aliases = []
		rcpt_nonaliases = []

		for entry in rcpttos:
			rcpt_addx = Address(entry)
			if rcpt_addx.is_servername():
				rcpt_aliases.append(entry)
			else:
				rcpt_nonaliases.append(entry)

		self.send(str(alias_addx), rcpt_nonaliases, msg)
		self.forward(str(alias_addx), rcpt_aliases, msg)

		self.db.add_history(usralias.get_rid(),
				    True,
				    address.getaddresses(rcpttos),
				    msg['Message-ID'],
				    user.get_salt())

		return
Example #16
0
	def forward(self, mailfrom, rcpttos, msg):
		""" Handles Case 2, where email is not from a service user and so
		needs to be forwarded to various aliases.
		"""
		for rcpt in rcpttos:
			prcpt = Address(rcpt)
			alias_pair = prcpt.parse_alias_address()
			logging.debug(rcpt)
			if not alias_pair:
				# if the domain is SERVERNAME, sender screwed up; return error to sender...
				if prcpt.is_servername():
					logging.info('Encountered improperly formatted '
						     'address "%s" in recipients field', prcpt.address)

					# Create error response message
					err = ErrorMessage('forward.badformat',
							   fromaddx	= self.cfg.SVCALIAS,
							   toaddx	= mailfrom,
							   badalias	= prcpt.address)

					self.send(err['From'], [mailfrom], err)

				# ... otherwise ignore; not our job to send to non-users
				logging.info('Encountered recipient outside our domain; ignoring')

			else:
				alias_data = self.db.get_alias_data(*alias_pair)

				if alias_data:
					fwd_alias = Alias(**alias_data)

					userdata = self.db.get_user(uid=fwd_alias.get_uid())
					assert userdata is not None
					user = User(**userdata)

					logging.debug('is trusted? %s', fwd_alias.is_trusted())

					# handle trustedness here;
					if not fwd_alias.is_trusted():

						mfrom = Address(mailfrom)

						# if sender is in trusted group, then it's all right;
						if self.db.is_trusted_correspondent(mfrom,			\
										    user.get_salt(),		\
										    fwd_alias.get_rid(),	\
										    fwd_alias.get_trusted_timestamp()):
							pass # TODO: send/append something about newer alias to send to?

						else:
							capstat = self.db.get_capstat(mfrom,		\
										      user.get_salt(),	\
										      fwd_alias.get_rid())

							logging.debug('capstat=%s', capstat)

							if capstat < self.db.CAPTCHA_PENDING:
								logging.debug('captcha not yet sent; trying to send one')
								# If not approved, send captcha to sender and drop mail.
								# TODO: Perhaps we can cache the mail somewhere.
								cid = self.db.get_cid(mfrom, user.get_salt())
								self.send_captcha(mailfrom, cid, fwd_alias)
								#self.db.set_capstat(cid,
								#		    fwd_alias.get_rid(),
								#		    self.db.CAPTCHA_PENDING)
								logging.debug('done sending captcha')

							elif capstat == self.db.CAPTCHA_PENDING:
								logging.debug('captcha was already sent; still waiting for solution')

							elif capstat == self.db.CAPTCHA_APPROVED:
								logging.debug('captcha approved, but not yet user approved')
								# if user denied,
								# TODO: just ignore? or do something more?
								#	pass

								# if user judgement pending, send message
								# informing them they must wait for user's approval?
								if capstat == self.db.USER_PENDING:
									pass # TODO: send message

							return

					# TODO: can consult a whitelist/blacklist/etc. here

					fwd_addx = Address(user.get_forwarding_address())
					fwd_addx.realname = prcpt.realname

					logging.info('Found alias for account (%s) Forwarding message to %s', \
						      user.get_username(), fwd_addx.address)

					# Add hint as recipient name. The hint/domain is used as a reminder
					# to the user where this email address was originally created for.
					# But since we did not update Reply-To, it will drop off when the
					# user replies to the message.
					rcptaddr = Address(rcpt)

					if rcptaddr.get_realname() == '':
						if fwd_alias.get_hint() != None:
							rcptaddr.set_realname(fwd_alias.get_hint())

						elif fwd_alias.get_domain() != None:
							rcptaddr.set_realname(fwd_alias.get_domain())

						msg.replace_address('To', rcpt, rcptaddr)

					acct_addx = Address(user.get_account_address())
					acct_addx.realname = prcpt.realname

					#del msg['message-id']
					#del msg['DKIM-Signature']

					if 'To' in msg:
						msg.replace_header('To', msg['To'] + ', ' + str(acct_addx))

					if 'Reply-To' in msg:
						msg.replace_header('Reply-To', msg['reply-to'] + ', ' + rcpt);
					else:
						msg.add_header('Reply-To', mailfrom + ', ' + rcpt);

					if 'Message-ID' not in msg:
						msg.generate_message_id(self.cfg.DOMAIN)

					self.send(mailfrom, [str(fwd_addx)], msg)
					self.db.add_history(fwd_alias.get_rid(), False, [Address(mailfrom)], msg['Message-ID'], user.get_salt())

				else:
					logging.info("Couldn't find data for alias (%s,%d)", *alias_pair)

		return
Example #17
0
	def create_alias_helper(self, user, aliasname, \
	primary=False, rcpt=None, trusted=True, hint=None):
		""" Helper function to create alias.
		Generates <rand> for user for the <aliasname> specified.
		If <aliasname> belonging to the user already exists, the existing aid is used.
		If <aliasname> belonging to another user already exists, an error is returned.
		"""
		(aid, uid) = self.db.get_aliasname_data(aliasname)

		# Error if user doesn't own the aliasname
		if uid != None and uid != user.get_uid():
			logging.info('User %d does not own "%s".', user.get_uid(), aliasname)

			# Create error response message
			err = ErrorMessage('createalias.notowner',
					   fromaddx	 = self.cfg.GETALIAS,
				           toaddx	 = user.get_account_address(),
					   aliasname	 = aliasname)

			self.send(err['From'], [user.get_forwarding_address()], err)
			return None

		#
		# Now, aliasname either belongs to the user or is not in use.
		#

		# Gets the alias id, either by getting an existing one or create a new one.
		if uid == user.get_uid():
			newaid = aid
			logging.debug('Using existing aid %d for aliasname "%s"',	\
					newaid, aliasname)
		elif uid == None:
			newaid = self.db.insert_alias(user.get_uid(), aliasname, primary)
			logging.debug('Created new aid %d for aliasname "%s"',	\
					newaid, aliasname)
		else:
			return None

		#
		# If a recipient is given, check history to see if there was any
		# previously generated <rand> that we can use.
		# TODO: We might have to make sure the recipient is active.
		#

		newalias = None
		cid = None
		if rcpt != None:
			cid = self.db.peek_cid(rcpt, user.get_salt())

		rid = None
		if cid != None:
			rid = self.db.get_history_rid(aliasname, cid)
			if rid != None:
				# Found a history correspondence
				hist_alias = Alias(self.db.get_alias_data(rid))
				hist_aliasname, hist_aliasrand = hist_alias.get_alias_pair()

				logging.debug('History aliasname\t:"%s"', hist_aliasname)

				if hist_aliasname == aliasname:
					logging.debug('Reuse history aliasrand\t:"%s"', \
							hist_aliasrand)
					newalias = Alias(hist_aliasname, hist_aliasrand)
				else:
					# Can't use the rid found since aliasname differs
					rid = None


		# Create a new alias (aka aliasrand or <aliasname>.<rand>)
		if newalias == None:
			logging.debug('Generating new aliasrand')
			newalias = Alias(aliasname, alias.generate_rint())

		logging.debug('Using alias\t\t: %s', newalias)

		# Update aid, uid and set isactive for new alias
		newalias.set_values(aid=newaid, uid=user.get_uid(), isactive=1)

		# Sets up alias pair
		alias_pair = newalias.get_alias_pair()

		# If we don't have rid yet, insert aliasrand to DB and mark as active
		if rid == None:
			rid = self.db.insert_aliasrnd(user.get_uid(),	\
				newaid,					\
				alias_pair[0], alias_pair[1],		\
				1, trusted, hint)
		if rid == None:
			return None

		# Looks like this double counts in the history table;
		#if rcpt != None:
		#	self.db.add_history(rid, True, [rcpt], user.get_salt())


		# Creates the alias address, which includes the domain
		aliasaddx = Address(newalias.get_alias_address())
		logging.info('Aliasrnd Address\t\t: %s', str(aliasaddx))
		return aliasaddx
Example #18
0
def resolve_aliases(slug, inputs):
    print_flag = 0
    #out = open("merge_dump/"+slug.replace("/", "_____"), "w")

    unmask = {}
    aliases = {}

    # Helper structures
    d_email_uid = {}
    d_uid_email = {}

    d_prefix_uid = {}
    d_uid_prefix = {}

    d_comp_prefix_uid = {}
    d_uid_comp_prefix = {}

    d_uid_domain = {}
    d_domain_uid = {}

    d_name_uid = {}
    d_uid_name = {}

    d_name_parts_uid = {}
    d_uid_name_parts = {}

    d_name_app_uid = {}
    d_uid_app_parts = {}

    d_login_uid = {}
    d_uid_login = {}

    d_location_uid = {}
    d_uid_location = {}

    d_uid_type = {}
    #d_type_usr = {}

    uid = 0

    # raw = {}

    for ind, row in inputs.iterrows():
        uid = row["id"]
        name = row["name"]
        email = row["email"]
        # raw[uid] = line
        login = row["login"]  #None #row[1].strip()
        if row["type"] == None:
            user_type = ""
        else:
            user_type = str(row["type"])  #None
        if row["location"] == None:
            location = ""
        else:
            location = str(row["location"])  #None
        # try:
        #     name = line.split('<')[0].strip()
        #     email = line.split('<')[1].strip().split('>')[0].strip()
        # except:
        #     print line
        #     exit()

        unmask[uid] = uid

        if row["record_type"] == 1:
            record_type = USR_REAL
        else:
            record_type = USR_FAKE
    #     m = fakeusr_rex.search(login)
    #     if m is not None:
    #         record_type = USR_FAKE
    #     else:
    #         record_type = USR_REAL

        a = Alias(record_type, uid, login, name, email, location, user_type)
        aliases[uid] = a

        # - email
        d_uid_email[a.uid] = a.email
        if a.email is not None:
            d_email_uid.setdefault(a.email, set([a.uid]))
            d_email_uid[a.email].add(a.uid)

        # - prefix
        d_uid_prefix[a.uid] = a.email_prefix
        d_uid_comp_prefix[a.uid] = a.email_prefix
        if a.email_prefix is not None:
            if len(a.email_prefix.split('.')) > 1 or len(
                    a.email_prefix.split('_')) > 1:
                d_comp_prefix_uid.setdefault(a.email_prefix, set([a.uid]))
                d_comp_prefix_uid[a.email_prefix].add(a.uid)
            else:
                d_prefix_uid.setdefault(a.email_prefix, set([a.uid]))
                d_prefix_uid[a.email_prefix].add(a.uid)

        # - domain
        d_uid_domain[a.uid] = a.email_domain
        if a.email_domain is not None:
            d_domain_uid.setdefault(a.email_domain, set([a.uid]))
            d_domain_uid[a.email_domain].add(a.uid)

        # - login
        d_uid_login[a.uid] = a.login
        if a.login is not None:
            d_login_uid.setdefault(a.login, set([a.uid]))
            d_login_uid[a.login].add(a.uid)

            if a.record_type == USR_REAL:
                d_login_uid.setdefault(a.login.lower(), set([a.uid]))
                d_login_uid[a.login.lower()].add(a.uid)

        # type
        d_uid_type[a.uid] = a.usr_type

        # - name
        d_uid_name[a.uid] = a.name
        if a.name is not None and len(a.name):
            d_name_uid.setdefault(a.name, set([a.uid]))
            d_name_uid[a.name].add(a.uid)

            if len(a.name.split(' ')) == 1:
                d_name_uid.setdefault(a.name.lower(), set([a.uid]))
                d_name_uid[a.name.lower()].add(a.uid)

                # janejohnson -> janejohnson
                # we need this for matching
                d_name_app_uid.setdefault(a.name.lower(), set([a.uid]))
                d_name_app_uid[a.name.lower()].add(a.uid)

            # jane johnson -> janejohnson
            d_name_app_uid.setdefault("".join(a.name.split(" ")).lower(),
                                      set([a.uid]))
            d_name_app_uid["".join(a.name.split(" ")).lower()].add(a.uid)

            if "@" in a.name:  # otherwise it will make "gmail", "com" as names
                name_subpart = a.name.split("@")[0]
                d_name_parts_uid.setdefault(name_subpart.lower(), set([a.uid]))
                d_name_parts_uid[name_subpart.lower()].add(a.uid)
            else:
                # xiyi ji -> ji xiyi
                name_parts_split = a.name.lower().replace(",", " ").replace(
                    ".", " ").split(' ')
                if len(name_parts_split) != 2:
                    continue
                new_name_parts = name_parts_split[-1] + " " + name_parts_split[
                    0]
                d_name_parts_uid.setdefault(new_name_parts, set([a.uid]))
                d_name_parts_uid[new_name_parts].add(a.uid)

        # - location
        d_uid_location[a.uid] = a.location
        if a.location is not None and len(a.location):
            d_location_uid.setdefault(a.location, set([a.uid]))
            d_location_uid[a.location].add(a.uid)

        # idx += 1
        # if idx >= curidx:
        #     print curidx/step
        #     curidx += step

    # print 'Done: helpers'

    clues = {}

    for email, set_uid in d_email_uid.items():
        if len(set_uid) > THR_MIN:
            for a, b in combinations(sorted(set_uid, key=lambda uid: int(uid)),
                                     2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(EMAIL)
    #                print a,b,EMAIL

    # print 'Done: email'

    for prefix, set_uid in d_comp_prefix_uid.items():
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            if len(prefix) >= 3:
                for a, b in combinations(
                        sorted(set_uid, key=lambda uid: int(uid)), 2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(COMP_EMAIL_PREFIX)
    #                    print a,b,COMP_EMAIL_PREFIX

    # print 'Done: comp email prefix'

    for prefix, set_uid in d_prefix_uid.items():
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            if len(prefix) >= 3:
                for a, b in combinations(
                        sorted(set_uid, key=lambda uid: int(uid)), 2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(SIMPLE_EMAIL_PREFIX)
    #                    print a,b,SIMPLE_EMAIL_PREFIX

    # print 'Done: email prefix'

    for prefix in set(d_prefix_uid.keys()).intersection(set(
            d_login_uid.keys())):
        if len(d_prefix_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_login_uid[prefix], key=lambda uid: int(uid)),
                    sorted(d_prefix_uid[prefix], key=lambda uid: int(uid))):
                if a < b:
                    clues.setdefault((a, b), [])
                    if not SIMPLE_EMAIL_PREFIX in clues[(a, b)]:
                        clues[(a, b)].append(PREFIX_LOGIN)
    #                    print a,b,PREFIX_LOGIN

    # print 'Done: prefix=login'

    for prefix in set(d_prefix_uid.keys()).intersection(set(
            d_name_uid.keys())):
        if len(d_prefix_uid[prefix]) < THR_MAX and len(
                d_name_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_name_uid[prefix], key=lambda uid: int(uid)),
                    sorted(d_prefix_uid[prefix], key=lambda uid: int(uid))):
                if a < b:
                    clues.setdefault((a, b), [])
                    if not SIMPLE_EMAIL_PREFIX in clues[(a, b)]:
                        clues[(a, b)].append(PREFIX_NAME)

    # print 'Done: prefix=name'

    for prefix in set(d_login_uid.keys()).intersection(set(d_name_uid.keys())):
        if len(d_name_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_name_uid[prefix], key=lambda uid: int(uid)),
                    sorted(d_login_uid[prefix], key=lambda uid: int(uid))):
                if a < b:
                    clues.setdefault((a, b), [])
                    if not SIMPLE_EMAIL_PREFIX in clues[(a, b)]:
                        clues[(a, b)].append(LOGIN_NAME)

    # print 'Done: login=name'

    for name, set_uid in d_name_uid.items():
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            if len(name.split(' ')) > 1:
                for a, b in combinations(
                        sorted(set_uid, key=lambda uid: int(uid)), 2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(FULL_NAME)
            else:
                for a, b in combinations(
                        sorted(set_uid, key=lambda uid: int(uid)), 2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(SIMPLE_NAME)

    # print 'Done: full/simple name'

    for name, set_uid in d_name_parts_uid.items():
        #out.write(name + "," + str(set_uid) + "\n")
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            for a, b in combinations(sorted(set_uid, key=lambda uid: int(uid)),
                                     2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(NAME_PARTS)

    #out.write("\n")

    # print 'Done: name parts'

    for name, set_uid in d_name_app_uid.items():
        #out.write(name + "," + str(set_uid))
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            for a, b in combinations(sorted(set_uid, key=lambda uid: int(uid)),
                                     2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(NAME_APPENDED)

    # print 'Done: name parts appended'

    for domain, set_uid in d_domain_uid.items():
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            for a, b in combinations(sorted(set_uid, key=lambda uid: int(uid)),
                                     2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(DOMAIN)

    # print 'Done: email domain'

    for location, set_uid in d_location_uid.items():
        if len(set_uid) > THR_MIN:
            for a, b in combinations(sorted(set_uid, key=lambda uid: int(uid)),
                                     2):
                na = d_uid_name[a]
                nb = d_uid_name[b]
                if na is not None and nb is not None and len(
                        na.split()) > 1 and na == nb:
                    if len(d_name_uid.get(na, set([]))) < THR_MAX:
                        clues.setdefault((a, b), [])
                        clues[(a, b)].append(LOCATION)

    # print 'Done: location'

    d_alias_map = {}
    clusters = {}
    labels = {}

    def merge(a, b, rule):
        # Contract: a < b
        assert a < b, "A must be less than B"
        if a in d_alias_map:
            if b in d_alias_map:
                if d_alias_map[a] == d_alias_map[b]:
                    labels[d_alias_map[a]].append(rule)
                else:
                    lowest = min(d_alias_map[a], d_alias_map[b])
                    highest = max(d_alias_map[a], d_alias_map[b])
                    labels[lowest].extend(labels[highest])
                    labels[lowest].append(rule)
                    clusters[lowest].update(clusters[highest])
                    for x in clusters[highest]:
                        d_alias_map[x] = lowest
                    del labels[highest]
                    del clusters[highest]
                    d_alias_map[a] = lowest
                    d_alias_map[b] = lowest

            else:
                # a is an alias; first time I see b
                d_alias_map[b] = d_alias_map[a]
                clusters[d_alias_map[a]].add(b)
                labels[d_alias_map[a]].append(rule)
        else:
            if b in d_alias_map:
                #b_src = d_alias_map[b] # b_src < a by construction
                d_alias_map[a] = d_alias_map[b]
                clusters[d_alias_map[b]].add(a)
                labels[d_alias_map[b]].append(rule)
            else:
                # First time I see this pair (guaranteed sorted)
                d_alias_map[a] = a
                d_alias_map[b] = a
                clusters[a] = set([a, b])
                labels[a] = [rule]

    for (a, b), list_clues in sorted(clues.items(),
                                     key=lambda e:
                                     (int(e[0][0]), int(e[0][1]))):
        if print_flag:
            print(((a, b), list_clues))
        aa = aliases[a]
        ab = aliases[b]

        if EMAIL in list_clues:
            merge(a, b, EMAIL)
        elif len(set(list_clues)) >= 2:
            for clue in set(list_clues):
                merge(a, b, clue)
    #            merge(a,b,TWO)
        elif FULL_NAME in list_clues:
            merge(a, b, FULL_NAME)
        elif NAME_APPENDED in list_clues:
            merge(a, b, NAME_APPENDED)
        elif NAME_PARTS in list_clues:
            merge(a, b, NAME_PARTS)
        elif COMP_EMAIL_PREFIX in list_clues:
            merge(a, b, COMP_EMAIL_PREFIX)
        elif SIMPLE_NAME in list_clues:
            merge(a, b, SIMPLE_NAME)
        elif PREFIX_NAME in list_clues:
            merge(a, b, PREFIX_NAME)

    # print 'Done: clusters'

    for uid, member_uids in clusters.items():
        # print ((uid, member_uids))
        members = [aliases[m] for m in member_uids]

        # Count fake/real
        c = Counter([m.record_type for m in members])
        real = [m for m in members if m.record_type == USR_REAL]
        with_location = [m for m in real if m.location is not None]
        fake = [m for m in members if m.record_type == USR_FAKE]

        # Count rules that fired
        cl = Counter(labels[uid])
        if print_flag:
            print(cl)

        is_valid = False

        # If all have the same email there is no doubt
        if cl.get(EMAIL, 0) >= (len(members) - 1):
            is_valid = True
        # If all the REALs have the same email, assume all the FAKEs are this REAL
        elif len(Counter([m.email for m in real]).keys()) == 1:
            is_valid = True
        # If there is at most one real, at least two rules fired, and each rule applied to each pair
        elif len(cl.keys()) > 1 and min(cl.values()) >= (len(members) - 1):
            is_valid = True
        # At most one real, the only rule that fired is COMP_EMAIL_PREFIX or FULL_NAME
        elif len(cl.keys()) == 1 and \
                (cl.get(COMP_EMAIL_PREFIX,0) or cl.get(FULL_NAME,0) or \
                 cl.get(NAME_PARTS,0) or cl.get(NAME_APPENDED,0)):
            is_valid = True
        # All with same full name and location / same full name and email domain
        elif cl.get(FULL_NAME,0) >= (len(members)-1) and \
                (cl.get(LOCATION,0) >= (len(members)-1) or cl.get(DOMAIN,0) >= (len(members)-1)):
            is_valid = True
        # All same composite email prefix / same full name
        elif (cl.get(COMP_EMAIL_PREFIX, 0) >= (len(members) - 1)
              or cl.get(FULL_NAME, 0) >= (len(members) - 1)):
            is_valid = True
        elif cl.get(NAME_APPENDED, 0) >= (len(members) - 1):
            is_valid = True
        elif cl.get(FULL_NAME, 0) >= (len(members) - 1):
            is_valid = True
        # The only two rules that fired are full name and email, in some combination
        elif len(cl.keys()) == 2 and cl.get(FULL_NAME, 0) > 0 and cl.get(
                EMAIL, 0) > 0:
            is_valid = True
        elif len(cl.keys()) == 3 and cl.get(FULL_NAME, 0) > 0 and cl.get(
                EMAIL, 0) > 0 and cl.get(SIMPLE_NAME, 0) > 0:
            is_valid = True
        elif len(cl.keys()) == 2 and cl.get(EMAIL, 0) > 0 and cl.get(
                SIMPLE_NAME, 0) > 0:
            is_valid = True
        elif cl.get(PREFIX_NAME, 0) > 0:
            is_valid = True
        elif cl.get(SIMPLE_NAME,0) > 0 and cl.get(FULL_NAME,0) > 0 \
            and cl.get(SIMPLE_EMAIL_PREFIX,0) > 0 and cl.get(EMAIL,0) > 0:
            is_valid = True
        elif cl.get(SIMPLE_NAME, 0) > 0:
            is_valid = True
        elif cl.get(NAME_PARTS, 0) >= (len(members) - 1):
            is_valid = True
        else:
            # is_valid = True
            # continue
            # Split by email address if at least 2 share one
            if cl.get(EMAIL, 0):
                ce = [
                    e for e, c in Counter([m.email for m in members]).items()
                    if c > 1
                ]
                for e in ce:
                    extra_members = [m for m in members if m.email == e]
                    extra_real = [
                        m for m in extra_members if m.record_type == USR_REAL
                    ]
                    extra_with_location = [
                        m for m in extra_real if m.location is not None
                    ]

                    if len(extra_real):
                        if len(extra_with_location):
                            # Pick the one with the oldest account with location, if available
                            rep = sorted(extra_with_location,
                                         key=lambda m: int(m.uid))[0]
                        else:
                            # Otherwise pick the one with the oldest account
                            rep = sorted(extra_real,
                                         key=lambda m: int(m.uid))[0]
                    else:
                        rep = sorted(extra_members,
                                     key=lambda m: int(m.uid))[0]

                    # w_log.writerow([])
                    # w_log.writerow([rep.uid, rep.login, rep.name, rep.email, rep.location])
                    for a in extra_members:
                        if a.uid != rep.uid:
                            # w_log.writerow([a.uid, a.login, a.name, a.email, a.location])
                            # writer.writerow([a.uid, rep.uid])
                            unmask[a.uid] = rep.uid
                            # print ('Mapped:' + str((a.uid, rep.uid)))

            # w_maybe.writerow([])
            # w_maybe.writerow([str(cl.items())])
            if print_flag:
                print(str(cl.items()))
                for m in members:
                    print([m.uid, m.name, m.email])
                # w_maybe.writerow([m.uid, m.login, m.name, m.email, m.location])

        if is_valid:
            # Determine group representative
            if len(real):
                if len(with_location):
                    # Pick the one with the oldest account with location, if available
                    rep = sorted(with_location, key=lambda m: int(m.uid))[0]
                else:
                    # Otherwise pick the one with the oldest account
                    rep = sorted(real, key=lambda m: int(m.uid))[0]
            else:
                rep = sorted(members, key=lambda m: int(m.uid))[0]

            # w_log.writerow([])
            # w_log.writerow([str(cl.items())])
            # w_log.writerow([rep.uid, rep.login, rep.name, rep.email, rep.location])
            for a in members:
                if a.uid != rep.uid:
                    # w_log.writerow([a.uid, a.login, a.name, a.email, a.location])
                    # writer.writerow([a.uid, rep.uid])
                    unmask[a.uid] = rep.uid
                    if print_flag:
                        print('Mapped:' + str((a.uid, rep.uid)))

    return unmask
Example #19
0
def main (args, app):
  new_alias = Alias(name=args.name, command=args.command)
  new_alias.store(app.config)
  app.config.save( )
  print "added", new_alias.format_url( )
Example #20
0
    def add_alias(self, new_cpp_type_name, old_cpp_type_name):
        try:
            direct_new_cpp_global_expr = self.cpp_type_expr_parser.parse(
                new_cpp_type_name).prefix(self.components)
            direct_old_cpp_global_expr = self.resolve_cpp_type_expr(
                old_cpp_type_name)
            self.type_mgr.add_alias(direct_new_cpp_global_expr,
                                    direct_old_cpp_global_expr)
            direct_new_kl_local_name = new_cpp_type_name
            direct_new_kl_global_name = '_'.join(self.nested_kl_names +
                                                 [direct_new_kl_local_name])
            direct_old_dqti = self.type_mgr.get_dqti(
                direct_old_cpp_global_expr)
            print "direct_old_dqti.type_info.kl.name = " + str(
                direct_old_dqti.type_info.kl.name)
            print "direct_old_dqti.type_info.edk.name = " + str(
                direct_old_dqti.type_info.edk.name)
            print "direct_old_dqti.type_info.lib.name = " + str(
                direct_old_dqti.type_info.lib.name)
            print "direct_old_dqti.type_info.lib.expr = " + str(
                direct_old_dqti.type_info.lib.expr)
            direct_alias = Alias(self, direct_new_kl_global_name,
                                 direct_old_dqti.type_info)
            self.ext.add_decl(direct_alias)

            const_ptr_new_cpp_type_expr = PointerTo(
                Const(direct_new_cpp_global_expr))
            const_ptr_old_cpp_type_expr = PointerTo(
                Const(direct_old_cpp_global_expr))
            self.type_mgr.add_alias(const_ptr_new_cpp_type_expr,
                                    const_ptr_old_cpp_type_expr)
            const_ptr_new_kl_type_name = direct_new_kl_global_name + "_CxxConstPtr"
            const_ptr_old_dqti = self.type_mgr.get_dqti(
                const_ptr_old_cpp_type_expr)
            const_ptr_old_kl_type_name = const_ptr_old_dqti.type_info.kl.name.compound
            const_ptr_alias = Alias(self, const_ptr_new_kl_type_name,
                                    const_ptr_old_dqti.type_info)
            self.ext.add_decl(const_ptr_alias)
            self.ext.add_kl_epilog("""
%s Make_%s(%s value) {
  return Make_%s(value);
}

%s Make_%s(io %s value) {
  return Make_%s(value);
}
""" % (
                const_ptr_new_kl_type_name,
                const_ptr_new_kl_type_name,
                direct_new_kl_global_name,
                const_ptr_old_kl_type_name,
                const_ptr_new_kl_type_name,
                const_ptr_new_kl_type_name,
                direct_new_kl_global_name,
                const_ptr_old_kl_type_name,
            ))

            mutable_ptr_new_cpp_type_expr = PointerTo(
                direct_new_cpp_global_expr)
            mutable_ptr_old_cpp_type_expr = PointerTo(
                direct_old_cpp_global_expr)
            self.type_mgr.add_alias(mutable_ptr_new_cpp_type_expr,
                                    mutable_ptr_old_cpp_type_expr)
            mutable_ptr_new_kl_type_name = direct_new_kl_global_name + "_CxxPtr"
            mutable_ptr_old_dqti = self.type_mgr.get_dqti(
                mutable_ptr_old_cpp_type_expr)
            mutable_ptr_old_kl_type_name = mutable_ptr_old_dqti.type_info.kl.name.compound
            mutable_ptr_alias = Alias(self, mutable_ptr_new_kl_type_name,
                                      mutable_ptr_old_dqti.type_info)
            self.ext.add_decl(mutable_ptr_alias)
            self.ext.add_kl_epilog("""
%s Make_%s(%s value) {
  return Make_%s(value);
}

%s Make_%s(io %s value) {
  return Make_%s(value);
}
""" % (
                mutable_ptr_new_kl_type_name,
                mutable_ptr_new_kl_type_name,
                direct_new_kl_global_name,
                mutable_ptr_old_kl_type_name,
                mutable_ptr_new_kl_type_name,
                mutable_ptr_new_kl_type_name,
                direct_new_kl_global_name,
                mutable_ptr_old_kl_type_name,
            ))

            const_ref_new_cpp_type_expr = ReferenceTo(
                Const(direct_new_cpp_global_expr))
            const_ref_old_cpp_type_expr = ReferenceTo(
                Const(direct_old_cpp_global_expr))
            self.type_mgr.add_alias(const_ref_new_cpp_type_expr,
                                    const_ref_old_cpp_type_expr)
            const_ref_new_kl_type_name = direct_new_kl_global_name + "_CxxConstRef"
            const_ref_old_dqti = self.type_mgr.get_dqti(
                const_ref_old_cpp_type_expr)
            const_ref_old_kl_type_name = const_ref_old_dqti.type_info.kl.name.compound
            const_ref_alias = Alias(self, const_ref_new_kl_type_name,
                                    const_ref_old_dqti.type_info)
            self.ext.add_decl(const_ref_alias)
            self.ext.add_kl_epilog("""
%s Make_%s(%s value) {
  return Make_%s(value);
}

%s Make_%s(io %s value) {
  return Make_%s(value);
}
""" % (
                const_ref_new_kl_type_name,
                const_ref_new_kl_type_name,
                direct_new_kl_global_name,
                const_ref_old_kl_type_name,
                const_ref_new_kl_type_name,
                const_ref_new_kl_type_name,
                direct_new_kl_global_name,
                const_ref_old_kl_type_name,
            ))

            mutable_ref_new_cpp_type_expr = ReferenceTo(
                direct_new_cpp_global_expr)
            mutable_ref_old_cpp_type_expr = ReferenceTo(
                direct_old_cpp_global_expr)
            self.type_mgr.add_alias(mutable_ref_new_cpp_type_expr,
                                    mutable_ref_old_cpp_type_expr)
            mutable_ref_new_kl_type_name = direct_new_kl_global_name + "_CxxRef"
            mutable_ref_old_dqti = self.type_mgr.get_dqti(
                mutable_ref_old_cpp_type_expr)
            mutable_ref_old_kl_type_name = mutable_ref_old_dqti.type_info.kl.name.compound
            mutable_ref_alias = Alias(self, mutable_ref_new_kl_type_name,
                                      mutable_ref_old_dqti.type_info)
            self.ext.add_decl(mutable_ref_alias)
            self.ext.add_kl_epilog("""
%s Make_%s(%s value) {
  return Make_%s(value);
}

%s Make_%s(io %s value) {
  return Make_%s(value);
}
""" % (
                mutable_ref_new_kl_type_name,
                mutable_ref_new_kl_type_name,
                direct_new_kl_global_name,
                mutable_ref_old_kl_type_name,
                mutable_ref_new_kl_type_name,
                mutable_ref_new_kl_type_name,
                direct_new_kl_global_name,
                mutable_ref_old_kl_type_name,
            ))

            return direct_alias
        except Exception as e:
            self.ext.warning("Ignoring alias '%s': %s" %
                             (new_cpp_type_name, e))
            return EmptyCommentContainer()
Example #21
0
def main(args, app):
    for device in Alias.FromConfig(app.config):
        if args.name in ['*', device.name]:
            print(args.format(device))
Example #22
0
def main(input_dir_path: str, out_dir_path: str):
    log.info("Input dir: %s; out_dir: %s", input_dir_path, out_dir_path)
    try:
        out_dir = os.path.abspath(out_dir_path)
    except IndexError:
        out_dir = os.path.abspath('./')
    out_dir = os.path.join(out_dir, 'idm')
    os.makedirs(out_dir, exist_ok=True)
    os.makedirs(os.path.join(out_dir, 'dict'), exist_ok=True)

    fakeusr_rex = regex.compile(r'\A[A-Z]{8}$')

    unmask = {}

    w_log = CsvWriter(csv_file=os.path.join(out_dir, 'idm_log.csv'))
    writer = CsvWriter(csv_file=os.path.join(out_dir, 'idm_map.csv'))
    w_maybe = CsvWriter(csv_file=os.path.join(out_dir, 'idm_maybe.csv'))

    idx = 0
    step = 100000
    curidx = step

    aliases = {}

    # Helper structures
    d_email_uid = {}
    d_uid_email = {}

    d_prefix_uid = {}
    d_uid_prefix = {}

    d_comp_prefix_uid = {}
    d_uid_comp_prefix = {}

    d_uid_domain = {}
    d_domain_uid = {}

    d_name_uid = {}
    d_uid_name = {}

    d_login_uid = {}
    d_uid_login = {}

    #df = pd.read_csv(input_dir_path, index_col=False, na_filter=False)
    df = utility.read_from_folder(input_dir_path, "*contributors.csv")

    users = [
        SzzContributor(getattr(row, "CONTRIBUTOR_ID"), getattr(row, "NAME"),
                       getattr(row, "EMAIL"))
        for row in df.itertuples(index=False)
    ]
    log.info("Users to parse: %d", len(users))

    for user in users:
        uid = user.id
        login = user.name
        name = user.name
        email = user.email

        if name is "github" and email is "*****@*****.**":
            continue

        unmask[uid] = uid

        m = fakeusr_rex.search(login)
        if m is not None:
            record_type = USR_FAKE
        else:
            record_type = USR_REAL

        # a = Alias(record_type, uid, login, name, email, location, user_type)
        a = Alias(record_type, uid, login, name, email)
        aliases[uid] = a

        # - email
        d_uid_email[a.uid] = a.email
        if a.email is not None:
            d_email_uid.setdefault(a.email, {a.uid})
            d_email_uid[a.email].add(a.uid)

        # - prefix
        d_uid_prefix[a.uid] = a.email_prefix
        d_uid_comp_prefix[a.uid] = a.email_prefix
        if a.email_prefix is not None:
            if len(a.email_prefix.split('.')) > 1 or len(
                    a.email_prefix.split('_')) > 1:
                d_comp_prefix_uid.setdefault(a.email_prefix, {a.uid})
                d_comp_prefix_uid[a.email_prefix].add(a.uid)
            else:
                d_prefix_uid.setdefault(a.email_prefix, {a.uid})
                d_prefix_uid[a.email_prefix].add(a.uid)

        # - domain
        d_uid_domain[a.uid] = a.email_domain
        if a.email_domain is not None:
            d_domain_uid.setdefault(a.email_domain, {a.uid})
            d_domain_uid[a.email_domain].add(a.uid)

        # - login
        d_uid_login[a.uid] = a.login
        if a.login is not None:
            d_login_uid.setdefault(a.login, set([a.uid]))
            d_login_uid[a.login].add(a.uid)

            if a.record_type == USR_REAL:
                d_login_uid.setdefault(a.login.lower(), set([a.uid]))
                d_login_uid[a.login.lower()].add(a.uid)

        # - name
        d_uid_name[a.uid] = a.name
        if a.name is not None and len(a.name):
            d_name_uid.setdefault(a.name, {a.uid})
            d_name_uid[a.name].add(a.uid)

            if len(a.name.split(' ')) == 1:
                d_name_uid.setdefault(a.name.lower(), {a.uid})
                d_name_uid[a.name.lower()].add(a.uid)

        idx += 1
        if idx >= curidx:
            log.info(curidx / step, '/ 30')
            curidx += step

    log.info('Done: helpers')

    clues = {}

    for email, set_uid in d_email_uid.items():
        if len(set_uid) > THR_MIN:
            for a, b in combinations(sorted(set_uid, key=lambda uid: uid), 2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(EMAIL)
    log.info('Done: email')

    for prefix, set_uid in d_comp_prefix_uid.items():
        if THR_MIN < len(set_uid) < THR_MAX:
            if len(prefix) >= 3:
                for a, b in combinations(sorted(set_uid, key=lambda uid: uid),
                                         2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(COMP_EMAIL_PREFIX)
    log.info('Done: comp email prefix')

    for prefix, set_uid in d_prefix_uid.items():
        if THR_MIN < len(set_uid) < THR_MAX:
            if len(prefix) >= 3:
                for a, b in combinations(sorted(set_uid, key=lambda uid: uid),
                                         2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(SIMPLE_EMAIL_PREFIX)
    log.info('Done: email prefix')

    for prefix in set(d_prefix_uid.keys()).intersection(set(
            d_login_uid.keys())):
        if len(d_prefix_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_login_uid[prefix], key=lambda uid: uid),
                    sorted(d_prefix_uid[prefix], key=lambda uid: uid)):
                if a < b:
                    clues.setdefault((a, b), [])
                    if SIMPLE_EMAIL_PREFIX not in clues[(a, b)]:
                        clues[(a, b)].append(PREFIX_LOGIN)
    log.info('Done: prefix=login')

    for prefix in set(d_prefix_uid.keys()).intersection(set(
            d_name_uid.keys())):
        if len(d_prefix_uid[prefix]) < THR_MAX and len(
                d_name_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_name_uid[prefix], key=lambda uid: uid),
                    sorted(d_prefix_uid[prefix], key=lambda uid: uid)):
                if a < b:
                    clues.setdefault((a, b), [])
                    if SIMPLE_EMAIL_PREFIX not in clues[(a, b)]:
                        clues[(a, b)].append(PREFIX_NAME)

    log.info('Done: prefix=name')

    for prefix in set(d_login_uid.keys()).intersection(set(d_name_uid.keys())):
        if len(d_name_uid[prefix]) < THR_MAX:
            for a, b in product(
                    sorted(d_name_uid[prefix], key=lambda uid: uid),
                    sorted(d_login_uid[prefix], key=lambda uid: uid)):
                if a < b:
                    clues.setdefault((a, b), [])
                    if SIMPLE_EMAIL_PREFIX not in clues[(a, b)]:
                        clues[(a, b)].append(LOGIN_NAME)
    log.info('Done: login=name')

    #    print d_name_uid.items()
    for name, set_uid in d_name_uid.items():
        if len(set_uid) > THR_MIN and len(set_uid) < THR_MAX:
            if len(name.split(' ')) > 1:
                for a, b in combinations(sorted(set_uid, key=lambda uid: uid),
                                         2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(FULL_NAME)
            #                    print a,b,FULL_NAME
            else:
                for a, b in combinations(sorted(set_uid, key=lambda uid: uid),
                                         2):
                    clues.setdefault((a, b), [])
                    clues[(a, b)].append(SIMPLE_NAME)

    log.info('Done: full/simple name')

    for domain, set_uid in d_domain_uid.items():
        if THR_MIN < len(set_uid) < THR_MAX:
            for a, b in combinations(sorted(set_uid, key=lambda uid: uid), 2):
                clues.setdefault((a, b), [])
                clues[(a, b)].append(DOMAIN)
    log.info('Done: email domain')

    for (a, b), list_clues in sorted(clues.items(),
                                     key=lambda e: (e[0][0], e[0][1])):
        if EMAIL in list_clues:
            merge(a, b, EMAIL)
        elif len(list_clues) >= 2:
            for clue in list_clues:
                merge(a, b, clue)
        elif FULL_NAME in list_clues:
            merge(a, b, FULL_NAME)
        elif COMP_EMAIL_PREFIX in list_clues:
            merge(a, b, COMP_EMAIL_PREFIX)
    log.info('Done: clusters')

    for uid, member_uids in clusters.items():
        members = [aliases[m] for m in member_uids]

        # Count fake/real
        real = [m for m in members if m.record_type == USR_REAL]
        # with_location = [m for m in real if m.location is not None]

        # Count rules that fired
        cl = Counter(labels[uid])

        is_valid = False

        # If all have the same email there is no doubt
        if cl.get(EMAIL, 0) >= (len(members) - 1):
            is_valid = True
        # If all the REALs have the same email, assume all the FAKEs are this REAL
        elif len(Counter([m.email for m in real]).keys()) == 1:
            is_valid = True
        # If there is at most one real, at least two rules fired, and each rule applied to each pair
        elif len(real) <= 1 and len(cl.keys()) > 1 and min(
                cl.values()) >= (len(members) - 1):
            is_valid = True
        # At most one real, the only rule that fired is COMP_EMAIL_PREFIX or FULL_NAME
        elif len(real) <= 1 and len(cl.keys()) == 1 and \
                (cl.get(COMP_EMAIL_PREFIX, 0) or cl.get(FULL_NAME, 0)):
            is_valid = True
        # All with same full name and location / same full name and email domain
        elif cl.get(FULL_NAME, 0) >= (len(members) - 1) and \
                (cl.get(LOCATION, 0) >= (len(members) - 1) or cl.get(DOMAIN, 0) >= (len(members) - 1)):
            is_valid = True
        # All fake and same composite email prefix / same full name
        elif len(real) == 0 and \
                (cl.get(COMP_EMAIL_PREFIX, 0) >= (len(members) - 1) or cl.get(FULL_NAME, 0) >= (len(members) - 1)):
            is_valid = True
        else:
            # Split by email address if at least 2 share one
            if cl.get(EMAIL, 0):
                ce = [
                    e for e, c in Counter([m.email for m in members]).items()
                    if c > 1
                ]
                for e in ce:
                    extra_members = [m for m in members if m.email == e]
                    # extra_with_location = [m for m in extra_real if m.location is not None]

                    # if len(extra_real):
                    #     if len(extra_with_location):
                    #         # Pick the one with the oldest account with location, if available
                    #         rep = sorted(extra_with_location, key=lambda m: int(m.uid))[0]
                    #     else:
                    #         # Otherwise pick the one with the oldest account
                    #         rep = sorted(extra_real, key=lambda m: int(m.uid))[0]
                    # else:
                    rep = sorted(extra_members, key=lambda m: m.uid)[0]

                    w_log.writerow([])
                    # w_log.writerow([rep.uid, rep.login, rep.name, rep.email, rep.location])
                    w_log.writerow([rep.uid, rep.name, rep.email])
                    for a in extra_members:
                        if a.uid != rep.uid:
                            # w_log.writerow([a.uid, a.login, a.name, a.email, a.location])
                            w_log.writerow([a.uid, a.name, a.email])
                            writer.writerow([a.uid, rep.uid])
                            unmask[a.uid] = rep.uid

            # -- added: Write also maybes to the alias map
            rep = sorted(members, key=lambda m: m.uid)[0]
            # -- end
            w_maybe.writerow([])
            w_maybe.writerow([str(cl.items())])
            for m in members:
                # -- added: added Write also maybes to the alias map
                if m.uid != rep.uid:
                    unmask[m.uid] = rep.uid
                    writer.writerow([m.uid, rep.uid])
                # -- end
                # w_maybe.writerow([m.uid, m.login, m.name, m.email, m.location])
                w_maybe.writerow([m.uid, m.name, m.email])

        if is_valid:
            # Determine group representative
            # if len(real):
            #    if len(with_location):
            #        # Pick the one with the oldest account with location, if available
            #        rep = sorted(with_location, key=lambda m: int(m.uid))[0]
            #    else:
            #        # Otherwise pick the one with the oldest account
            #        rep = sorted(real, key=lambda m: int(m.uid))[0]
            # else:
            rep = sorted(members, key=lambda m: m.uid)[0]

            w_log.writerow([])
            w_log.writerow([str(cl.items())])
            # w_log.writerow([rep.uid, rep.login, rep.name, rep.email, rep.location])
            w_log.writerow([rep.uid, rep.name, rep.email])
            for a in members:
                if a.uid != rep.uid:
                    # w_log.writerow([a.uid, a.login, a.name, a.email, a.location])
                    w_log.writerow([a.uid, a.name, a.email])
                    writer.writerow([a.uid, rep.uid])
                    unmask[a.uid] = rep.uid

    log.info("Unmasked size: %d", len(unmask))
    pickle.dump(unmask,
                open(os.path.join(out_dir, 'dict', 'aliasMap.dict'), 'wb'))
Example #23
0
class Compile:
    '''
    This class creates a compiler for the DDStorm
    that compiles the text files containing list of
    differential diagnosis to simplified modular
    data files usable by the program.
    '''
    def __init__(self, conf=False):
        '''
        The constructor optionally accepts a configuration.
        If none is provided it creates a default configuration.

        Parameters:
        conf - A dictionary containing configuration options
        '''
        if (conf):
            self._conf = conf
        else:
            self._conf = Conf()
        self.clean = True

    def compile(self):
        ''' Compile the text files to DDStorm modules. '''
        self.source = set()
        self.custom = set()
        self.alias = Alias(self._conf)

        # Loop over library files and add *.txt files to source
        for path, subdirs, files in os.walk(self._conf.get("library_path")):
            for name in files:
                if (fnmatch(name, "*.txt")):
                    self.source.add(os.path.join(path, name))

        # Loop over custom files and add *.txt files to custom
        for path, subdirs, files in os.walk(self._conf.get("custom_path")):
            for name in files:
                if (fnmatch(name, "*.txt")):
                    self.custom.add(os.path.join(path, name))

        # Create module directory if not already present and delete all module files
        if (not os.path.isdir(self._conf.get("module_path"))):
            os.makedirs(self._conf.get("module_path"))
        for f in os.listdir(self._conf.get("module_path")):
            if (fnmatch(f, "*.module")):
                os.unlink(self._conf.get("module_path") + f)

        # Create a regex for calculating priority from filename
        self.priorityRegex = re.compile("(?<=\.)\d+$")

        # First sort files by priority then compile them to module
        for src in self._sortPriority(self.source):
            self._makeModule(src)
        for src in self._sortPriority(self.custom):
            self._makeModule(src)

    def _sortPriority(self, files):
        ''' Sort data files based on their priority settings. '''
        ls = []
        # Loop over the files
        for addr in files:
            # Format the file name
            name = os.path.splitext(os.path.basename(addr))[0].lower().replace(
                "_", " ").replace("-", " ")
            # Search for priority tag on file name
            m = re.search(self.priorityRegex, name)
            # Add to ls as (symptom name, priority number, file name) with default priority of 100
            if (m):
                ls.append((name.replace("." + m.group(),
                                        ""), int(m.group()), addr))
            else:
                ls.append((name, 100, addr))
        # Sort the file list, first by the symptom name, then by the priority number
        ls.sort(reverse=True)
        if (ls):
            return (list(zip(*ls))[2])
        else:
            return ls

    def _makeModule(self, src):
        ''' Create application usable modules from data files. '''
        # Format the file name
        module = os.path.splitext(os.path.basename(src))[0].lower().replace(
            "_", " ").replace("-", " ")
        # Remove the priority tag from file name
        m = re.search(self.priorityRegex, module)
        if (m):
            module = module.replace("." + m.group(), "")
        # Create the module file name
        modFile = self._conf.get("module_path") + module + ".module"
        modFlag = False
        # Loop over both files, the source data file and the target module file
        with open(src, "r") as sf, open(modFile, "a") as tf:
            # Ignore lines starting with ! or #, + and - has special meaning, write other lines to module. Log the errors.
            for line in sf:
                line = line.strip().split("#")[0]
                if (len(line) == 0):
                    pass
                elif (line.startswith("!")):
                    pass
                elif (line.startswith("#")):
                    pass
                elif (line.startswith("+")):
                    modFlag = True
                elif (line.startswith("-")):
                    modFlag = True
                elif (line.replace(" ", "").replace("-", "").replace(
                        "_", "").replace("'", "").isalnum()):
                    print(self.alias.get(line).capitalize(), file=tf)
                else:
                    self.clean = False
                    logging.warning("Syntax error in file '" + src + "': " +
                                    line)
        # Deal with special lines
        if (modFlag):
            modFlag = False
            with open(src, "r") as f:
                for line in f:
                    line = line.strip().split("#")[0]
                    if (line[1:].replace(" ", "").replace("-", "").replace(
                            "_", "").replace("'", "").isalnum()):
                        # If line starts with + add it to the module file
                        if (line.startswith("+")):
                            with open(modFile, "r") as fn:
                                text = fn.read()
                            with open(modFile, "w") as fn:
                                print(self.alias.get(line[1:]).capitalize() +
                                      "\n" + text,
                                      file=fn)
                        # If line starts with - remove corresponding item from the module file
                        elif (line.startswith("-")):
                            with open(modFile, "r") as fn:
                                text = fn.read()
                            text = text.replace(
                                self.alias.get(line[1:]).capitalize() + "\n",
                                "")
                            with open(modFile, "w") as fn:
                                print(text, file=fn)

    def is_clean(self):
        '''Report if compilation ended successfully'''
        return self.clean