コード例 #1
0
ファイル: encoding_player.py プロジェクト: fpvandoorn/hanabi
    def GetDiscardable(self,r):
        Output = []
        CardCount = {i:0 for i in np.unique(self.SortedDeck)}
        for i in self.SortedDeck:
            CardCount[i] += 1
        for i,I in enumerate(r.playHistory):
            if I[0] == 'play' or I[0] == 'discard':
                CardCount[I[1]['name']] -= 1
            elif I[0] == 'hint':
                pass
            else:
                raise NameError('')
        
        for i in range(self.nCards):
            NumMatStr = c(self.InformationMatrix['NumMat'][self.SelfID,i])
            if NumMatStr == 'x': NumMatStr = self.NumStr
            SuitMatStr =  c(self.InformationMatrix['SuitMat'][self.SelfID,i])
            if SuitMatStr == 'x': SuitMatStr = str(range(5))[1:-1]

            SuitList =  [self.SuitStr.split(',')[l] for l in 
                        [int(k) for k in SuitMatStr.split(',')]]
            NumList  =  [str(int(k)) for k in NumMatStr.split(',')]
            PosList  = list(it.product(SuitList,NumList))
            
            SafeDiscard = True
            for J in PosList:
                if CardCount[J[1] + J[0]] < 2:
                    SafeDiscard = False
                    break
            if SafeDiscard:
                Output.append(i)
        return Output
コード例 #2
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
    def CardCountInfoMat(self, r, Turn):
        # This function uses card counting methods to restrict the
        # possibilities of the information matrix
        Improvement = False
        CardCount = self.GetCountFromDiscard(r, Turn)
        for i in range(self.nPlayers):
            for j in range(self.nCards):
                if (len(self.InformationMatrix[i, j, 'N']) == 1
                        and len(self.InformationMatrix[i, j, 'S']) == 1):
                    CardVal = (self.InformationMatrix[i, j, 'N'][0] +
                               self.InformationMatrix[i, j, 'S'][0])
                    CardCount[CardVal] -= 1

        for i in range(self.nPlayers):
            for j in range(self.nCards):
                N = self.InformationMatrix[i, j, 'N']
                S = self.InformationMatrix[i, j, 'S']
                if len(N) > 1 or len(S) > 1:
                    PossibleSet = ([
                        m[0] + m[1] for m in list(it.product(N, S))
                        if CardCount[m[0] + m[1]] > 0
                    ])
                    Nnew = np.unique([m[0] for m in PossibleSet]).tolist()
                    Snew = np.unique([m[1] for m in PossibleSet]).tolist()
                    if len(Nnew) < len(N) or len(Snew) < len(S):
                        Improvement = True
                        self.InformationMatrix[i, j, 'N'] = c(Nnew)
                        self.InformationMatrix[i, j, 'S'] = c(Snew)

            return Improvement
コード例 #3
0
ファイル: encoding_player.py プロジェクト: fpvandoorn/hanabi
 def GetPlayableCards(self,Progress):
     Output = {'PlayableInd':[],'nPossible':[]}        
     ValidPlays = []
     for key in Progress:
         ValidPlays.append(key + str(Progress[key]+1))
     
     for i in range(self.nCards):
         PossibleCards = []
         SuitInfo = c(self.InformationMatrix['SuitMat'][self.SelfID,i])
         if SuitInfo == 'x': SuitInfo = str(range(5))[1:-1]
         NumInfo = c(self.InformationMatrix['NumMat'][self.SelfID,i])
         if NumInfo == 'x': NumInfo = c(self.NumStr)
             
         SuitInd = [int(k) for k in SuitInfo.split(',')]   
         NumInd = [int(k) for k in NumInfo.split(',')] 
         SuitList = [self.SuitStr.split(',')[k] for k in SuitInd]
         NumList = [str(k) for k in NumInd]
         
         for J in it.product(SuitList,NumList):
             PossibleCards.append(J[0] + J[1])
         
         Playable = len(set(PossibleCards).intersection(ValidPlays)) == len(PossibleCards)
         if Playable:
             Output['PlayableInd'].append(i)
             Output['nPossible'].append(len(PossibleCards))
     return Output
コード例 #4
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
    def CardCountInfoMat(self,r,Turn):
        # This function uses card counting methods to restrict the 
        # possibilities of the information matrix
        Improvement = False
        CardCount = self.GetCountFromDiscard(r,Turn)
        for i in range(self.nPlayers):
            for j in range(self.nCards):
                if (len(self.InformationMatrix[i,j,'N']) == 1 and
                    len(self.InformationMatrix[i,j,'S']) == 1):
                        CardVal = (self.InformationMatrix[i,j,'N'][0]
                                    + self.InformationMatrix[i,j,'S'][0])
                        CardCount[CardVal] -= 1
        
        for i in range(self.nPlayers):
            for j in range(self.nCards):
                N = self.InformationMatrix[i,j,'N']
                S = self.InformationMatrix[i,j,'S']
                if len(N) > 1 or len(S) > 1:
                    PossibleSet = ([m[0] + m[1] for m in 
                        list(it.product(N,S)) if 
                        CardCount[m[0] + m[1]] > 0])
                    Nnew = np.unique([m[0] for m in PossibleSet]).tolist()
                    Snew = np.unique([m[1] for m in PossibleSet]).tolist()
                    if len(Nnew) < len(N) or len(Snew) < len(S):
                        Improvement = True
                        self.InformationMatrix[i,j,'N'] = c(Nnew)
                        self.InformationMatrix[i,j,'S'] = c(Snew)

            return Improvement
コード例 #5
0
 def truncate(self, MAX=50):
     """ Randomly truncate the document to up to MAX sentences """
     if len(self.sents) > MAX:
         i = random.sample(range(MAX, len(self.sents)), 1)[0]
         tokens = flatten(self.sents[i-MAX:i])
         return self.__class__(c(self.raw_text), tokens,
                               c(self.corefs), c(self.speakers),
                               c(self.genre), c(self.filename))
     return self
コード例 #6
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
    def update(self, x):  # Confirmed

        if len(self.replay) < self.capacity:
            self.replay.append(c(x))

        else:
            pass  # TODO?

        self.replay[self.time] = c(x)
        self.time = (self.time + 1) % self.capacity
コード例 #7
0
ファイル: zeckendorf-arithmetic.py プロジェクト: ncoe/rosetta
 def __sub__(self, rhs):
     copy = self
     rhs_dVal = rhs.dVal
     limit = (rhs.dLen + 1) * 2
     for gn in range(0, limit):
         if (rhs_dVal >> gn) & 1 == 1:
             copy.c(gn)
     while (((copy.dVal >>
              ((copy.dLen * 2) & 31)) & 3) == 0) or (copy.dLen == 0):
         copy.dLen = copy.dLen - 1
     return copy
コード例 #8
0
 def truncate(self, MAX=50):
     """ Randomly truncate the document to up to MAX sentences """
     sentences = [
         idx for idx, token in enumerate(self.tokens)
         if token in ['.', '?', '!']
     ]
     if len(sentences) > MAX:
         i = random.sample(range(MAX, len(sentences)), 1)[0]
         tokens = self.tokens[sentences[i - 50]:sentences[i]]
         return self.__class__(tokens, c(self.corefs), c(self.speakers),
                               c(self.genre))
     return self
コード例 #9
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    trainer = Trainer(model,
                      objective="binary",
                      verbose=0,
                      initializers=initializers_2)
    init_word_embed = []
    for n, p in trainer.model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
コード例 #10
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    model.compile(method="binary", verbose=0, initializers=initializers_2)
    init_word_embed = []
    for n, p in model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
コード例 #11
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def DetermineHint(self,Code,HintingPlayer,Turn):
     # Takes a selected code, looks at the other players' hands, and
     # determines what hint to give to provide the information corresponding
     # to the selected code.
     OtherIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
     (CodeList,TypeList,ColList,GroupSetList,EvalSetList,EncodeBase,
          PossibleResultList) = self.ExpandCode(Code)
     ActualResult = []
     for i,I in enumerate(CodeList):
         Columns = [int(m) for m in ColList[i].split(',')]
         PositionInSetList = []
         for j,J in enumerate(Columns):
             RawVal = (self.HandHistory[Turn][OtherIDs[j],J]
                         [0 if TypeList[i] == 'N' else 1])
             if TypeList[i] == 'N':
                 RawVal = int(RawVal)
             PositionInSetList.append([m for m,M in enumerate(EvalSetList[i]) 
                                 if RawVal in M][0])
         ActualResult.append(
             np.sum(PositionInSetList) % len(EvalSetList[i]))
     ResultSelection = ([m for m,M in enumerate(PossibleResultList) 
                         if np.array_equal(M,ActualResult)][0])
                             
     NumSuitSet = c(self.NumberSet)
     [NumSuitSet.append(m) for m in self.SuitSet]
     Hint = list(it.product(OtherIDs,NumSuitSet))[ResultSelection]
     return Hint
コード例 #12
0
 def find_all_paths_alpha(graph2,
                          start,
                          end,
                          _threshold,
                          weight=0,
                          path=[[], 0]):
     # global count
     path[0], path[1] = path[0] + [start], path[1] + weight
     if start == end:
         # if path:
         #     # if count % 1000 == 0:
         #     # print(count, ":", path)
         #     paths.append(path)
         #     count = count + 1
         return [path]
     if not start in graph2:
         return []
     paths = []
     for node, w in graph2[start].items():
         if node not in path[0] and path[1] + w <= _threshold:
             newpaths = find_all_paths_alpha(graph2, node, end, _threshold,
                                             w, c(path))
             for newpath in newpaths:
                 paths.append(newpath)
     return paths
コード例 #13
0
ファイル: encoding_player.py プロジェクト: fpvandoorn/hanabi
    def InfoMatHumanReadable(self):
        print('')
        InfoMatPrint = c(self.InformationMatrix)
        MaxLen = 0
        for key in InfoMatPrint:
            for j,J in enumerate(self.InformationMatrix[key]):
                for k,K in enumerate(J):
                    if key == 'SuitMat' and K != 'x':
                        Temp1 = ''
                        for m in [int(l) for l in K.split(',')]:
                            Temp1 += self.suits[m]
                        InfoMatPrint[key][j,k] = Temp1
                        K=InfoMatPrint[key][j,k]

                    MaxLen = np.max([MaxLen,len(K)])
            def PadStr(Str,Len):
                for i in range(Len-len(Str)):
                    Str += ' '
                return Str
            
        for key in InfoMatPrint:
            print(key)
            for J in InfoMatPrint[key]:
                for K in J:
                    if K=='x':
                        PrintStr = ''
                    else:
                        PrintStr = K
                    print(PadStr(PrintStr,MaxLen) + ' |' + ' '*4,)
                print('')
            print('')
コード例 #14
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
    def DetermineHint(self, Code, HintingPlayer, Turn):
        # Takes a selected code, looks at the other players' hands, and
        # determines what hint to give to provide the information corresponding
        # to the selected code.
        OtherIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
        (CodeList, TypeList, ColList, GroupSetList, EvalSetList, EncodeBase,
         PossibleResultList) = self.ExpandCode(Code)
        ActualResult = []
        for i, I in enumerate(CodeList):
            Columns = [int(m) for m in ColList[i].split(',')]
            PositionInSetList = []
            for j, J in enumerate(Columns):
                RawVal = (
                    self.HandHistory[Turn][OtherIDs[j],
                                           J][0 if TypeList[i] == 'N' else 1])
                if TypeList[i] == 'N':
                    RawVal = int(RawVal)
                PositionInSetList.append([
                    m for m, M in enumerate(EvalSetList[i]) if RawVal in M
                ][0])
            ActualResult.append(
                np.sum(PositionInSetList) % len(EvalSetList[i]))
        ResultSelection = ([
            m for m, M in enumerate(PossibleResultList)
            if np.array_equal(M, ActualResult)
        ][0])

        NumSuitSet = c(self.NumberSet)
        [NumSuitSet.append(m) for m in self.SuitSet]
        Hint = list(it.product(OtherIDs, NumSuitSet))[ResultSelection]
        return Hint
コード例 #15
0
ファイル: encoding_player.py プロジェクト: fpvandoorn/hanabi
 def CodeParse(self,CodeIn,DenseOtherHands):
     Position = int(CodeIn.split('_')[0][0])
     if Position >= DenseOtherHands['NumMat'].shape[1]:
         Position = 'custom'
         CustomPosition = [[int(j) for j in i.split(',')] 
                             for i in CodeIn.split('_')[2].split(':')]
     else:
         Position = int(Position)
         CustomPosition = ''
     MatLabel = {'N':'NumMat','S':'SuitMat'}[CodeIn.split('_')[0][1]]
     OtherHandMat = c(DenseOtherHands[MatLabel])
     if CodeIn.split('_')[1] == 'all':
         if CodeIn.split('_')[0][1] == 'N':
             Map = [[[1],[2],[3],[4],[5]],range(5)]
         elif CodeIn.split('_')[0][1] == 'S':
             Map = [[[0],[1],[2],[3],[4]],range(5)]
     elif CodeIn.split('_')[1] == '1,2+':
         if CodeIn.split('_')[0][1] == 'N':
             Map = [[[1],[2,3,4,5]],[1,0]]
         else:
             raise NameError('1,2+ valid only for numeric case')   
     elif CodeIn.split('_')[1] == '1-4+':
         if CodeIn.split('_')[0][1] == 'N':
             Map = [[[1],[2],[3],[4,5]],[0,1,2,3]]
         else:
             raise NameError('1-4+ valid only for numeric case')  
         
     return Position,Map,OtherHandMat,MatLabel,CustomPosition
コード例 #16
0
def test_single_initializer(model, initializer):

    inp_weights = model.wide.wide_linear.weight.data.detach().cpu()

    n_model = c(model)
    trainer = Trainer(n_model, objective="binary", initializers=initializer)
    init_weights = trainer.model.wide.wide_linear.weight.data.detach().cpu()

    assert not torch.all(inp_weights == init_weights)
コード例 #17
0
ファイル: encoding_player.py プロジェクト: fpvandoorn/hanabi
    def ValueFromCode(self,Code,DenseOtherHands,EncodedValue,Player):
        MixBaseList = []
        for i,I in enumerate(Code.split('__')):
            Position,Map,OtherHandMat,MatLabel,CustomPosition = self.CodeParse(I,DenseOtherHands)
            MixBaseList.append(len(Map[1]))
            
        MixBaseEnum = self.EnumerateMixedBase(MixBaseList)
        ResultList = MixBaseEnum[EncodedValue]
        
        BackCalcList = []
        for i,I in enumerate(Code.split('__')):
            AdjustInfoMatBool = True
            Position,Map,OtherHandMat,MatLabel,CustomPosition = self.CodeParse(I,DenseOtherHands)
            if Position != 'custom':
                OtherValues = [int(j) for j in OtherHandMat[:,Position] if j != 'x']
                if len(OtherValues) != self.nPlayers - 2:
                    raise NameError('Incorrect number of elements')

            else:
                PlayerAddress = []
                OtherPlayerAddress = []
                for j,J in enumerate(CustomPosition):
                    if J[0] == Player:
                        PlayerAddress.append(J)
                    else:
                        OtherPlayerAddress.append(J)
                
                if len(PlayerAddress) > 1:
                    raise NameError('Multiple unknowns for same player not supported')
                elif len(PlayerAddress) == 1:
                    PlayerAddress = PlayerAddress[0]
                    OtherValues = [OtherHandMat[j[0],j[1]] for j in OtherPlayerAddress]
                else:
                    AdjustInfoMatBool = False
                  
            if AdjustInfoMatBool:
                MappedOtherValues = self.MapVector(OtherValues,Map)
                ResultInd = (ResultList[i] - np.sum(MappedOtherValues)) % len(Map[1])
                BackCalcList.append(self.InverseMapVector([ResultInd],Map)[0])
                
                if Position != 'custom':  
                    PlayerAddress = [Player,Position]
    
                CurrentKnowledge = c(self.InformationMatrix[MatLabel][PlayerAddress[0],PlayerAddress[1]])
                
                AddedKnowledge = str(BackCalcList[-1])[1:-1]
                if CurrentKnowledge == 'x':
                    self.InformationMatrix[MatLabel][PlayerAddress[0],PlayerAddress[1]] = c(AddedKnowledge)
                elif len(CurrentKnowledge) == 1:
                    pass
                else:
                    CurrentSet =  [int(j) for j in AddedKnowledge.split(',')]
                    IntSet = [int(j) for j in CurrentKnowledge.split(',')]
                    NewSet = list(set(CurrentSet).intersection(IntSet))
                    if len(NewSet) == 0:
                        raise NameError('Error: Possibility has been reduced to empty set')
                    self.InformationMatrix[MatLabel][PlayerAddress[0],PlayerAddress[1]] = c(str(NewSet)[1:-1])    
コード例 #18
0
ファイル: aaa3.py プロジェクト: dossa328/MaxPre3
 def find_all_paths(graph2, start, end, weight=0, path=[[], 0]):
     path[0], path[1] = path[0] + [start], path[1] + weight
     if start == end:
         return [path]
     paths = []
     for node, w in graph2[start].items():
         if node not in path[0] and path[1] + w <= threshold:
             newpaths = find_all_paths(graph2, node, end, w, c(path))
             for newpath in newpaths:
                 paths.append(newpath)
     return paths
コード例 #19
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def BackCalcHintedState(self,Hint,Code,HintingPlayer):
     # Converts the actual hint (i.e. player 3 green) into the intended
     # vector of numbers (i.e. [2,0,0])
     OtherIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
     (CodeList,TypeList,ColList,GroupSetList,EvalSetList,EncodeBase,
          PossibleResultList) = self.ExpandCode(Code)
     NumSuitSet = c(self.NumberSet)
     [NumSuitSet.append(m) for m in self.SuitSet]
     ResultSelection = ([m for m,M in enumerate(list(
                         it.product(OtherIDs,NumSuitSet))) 
                         if np.array_equal(M,Hint)][0])
     ActualResult = PossibleResultList[ResultSelection]
     return ActualResult
コード例 #20
0
 def BackCalcHintedState(self,Hint,Code,HintingPlayer):
     # Converts the actual hint (i.e. player 3 green) into the intended
     # vector of numbers (i.e. [2,0,0])
     OtherIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
     (CodeList,TypeList,ColList,GroupSetList,EvalSetList,EncodeBase,
          PossibleResultList) = self.ExpandCode(Code)
     NumSuitSet = c(self.NumberSet)
     [NumSuitSet.append(m) for m in self.SuitSet]
     ResultSelection = ([m for m,M in enumerate(list(
                         it.product(OtherIDs,NumSuitSet))) 
                         if np.array_equal(M,Hint)][0])
     ActualResult = PossibleResultList[ResultSelection]
     return ActualResult
コード例 #21
0
    def dfs(self, start, end, vertex):
        visit.append(start)
        stack.append(start)
        if start == end:
            all_path[start] = c(stack)
            candidate_path.extend((all_path.values()))
            stack.pop()
            return candidate_path
        for it in vertex:
            if graph.is_adjacent(start, it):
                if it not in visit:
                    dfs(it, end)
                    visit.remove(it)

        stack.pop()
コード例 #22
0
ファイル: straight_forward.py プロジェクト: dossa328/MaxPre3
 def find_all_paths(graph2, start, end, _threshold, weight=0,  path=[[], 0]):
     path[0], path[1] = path[0] + [start], path[1] + weight
     if start == end:
         return [path]
     paths = []
     for node, w in graph2[start].items():
         # if - removed [" and path[1] + w <= _threshold: "]
         if node not in path[0] and path[1] + w <= _threshold:
             # if node not in deny:
             #     deny.append(node)
             newpaths = find_all_paths(graph2, node, end, _threshold, w, c(path))
             for newpath in newpaths:
                 paths.append(newpath)
     # print("add new paths : ", paths)
     return paths
コード例 #23
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def UpdateInformationMatrix(self, Hint, Code, HintingPlayer, Turn):
     # This is the function which performs the modular arithmetic back
     # calculation to convert a code and hint into the underlying encoded
     # information and transfers it into the information matrix.
     ActualResult = self.BackCalcHintedState(Hint, Code, HintingPlayer)
     (CodeList, TypeList, ColList, GroupSetList, EvalSetList, EncodeBase,
      PossibleResultList) = self.ExpandCode(Code)
     NonHintingIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
     OtherNonHintingIDs = [m for m in NonHintingIDs if m != self.SelfID]
     CodePosOtherNonHinting = [
         m for m, M in enumerate(NonHintingIDs) if M != self.SelfID
     ]
     for i, I in enumerate(CodeList):
         CurrentColList = [int(m) for m in ColList[i].split(',')]
         CurrentOtherColList = [
             CurrentColList[m] for m in CodePosOtherNonHinting
         ]
         OtherHandVals = []
         for j, J in enumerate(OtherNonHintingIDs):
             # Within this loop "Val" refers to the index of the set which
             # the card is known to belong
             HandValue = self.HandHistory[Turn][J, CurrentOtherColList[j]]
             if TypeList[i] == 'S':
                 HandValue = HandValue[-1]
             else:
                 HandValue = int(HandValue[:-1])
             OtherHandVals.append([
                 m for m, M in enumerate(EvalSetList[i]) if HandValue in M
             ][0])
         if self.SelfID != HintingPlayer:
             SelfVal = int((ActualResult[i] - np.sum(OtherHandVals)) %
                           len(EvalSetList[i]))
         OtherHandValsRev = c(OtherHandVals)
         OtherHandValsRev.reverse()
         NonHintingVals = ([
             OtherHandValsRev.pop() if M != self.SelfID else SelfVal
             for M in NonHintingIDs
         ])
         for j, J in enumerate(NonHintingIDs):
             RestrictedSet = EvalSetList[i][NonHintingVals[j]]
             RestrictedSet = [str(m) for m in RestrictedSet]
             self.InformationMatrix[J, CurrentColList[j], TypeList[i]] = (
                 list(
                     set(self.InformationMatrix[J, CurrentColList[j],
                                                TypeList[i]]).intersection(
                                                    RestrictedSet)))
コード例 #24
0
		def _NEWTON(self,guess, conv, omega=1.0):
	
			if self.zth and self.nth:
	
				while True:
	
				Jacobian, dF = c(self.DD), self.dForcing(guess)
	
				for i in range(self.size): 

					Jacobian[i][i]-=dF[i]
					F = np.dot(self.DD, guess) - self.bcvector - self._Forcing(guess)
					guess-=omega* np.dot(la.inv(Jacobian),F)
	

					#plt.plot(omega* np.dot(la.inv(Jacobian),F) )
					#print( max(abs(F)) )
				
					if max(abs(F)) < conv: 

						return guess
コード例 #25
0
def test_initializers_1():

	wide = Wide(100, 1)
	deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx,
	    embed_input=embed_input, continuous_cols=colnames[-5:])
	deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0)
	deepimage=DeepImage(pretrained=True)
	model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, output_dim=1)
	cmodel = c(model)

	org_weights = []
	for n,p in cmodel.named_parameters():
		if n in test_layers_1: org_weights.append(p)

	model.compile(method='binary', verbose=0, initializers=initializers_1)
	init_weights = []
	for n,p in model.named_parameters():
		if n in test_layers_1: init_weights.append(p)

	res = all([torch.all((1-(a==b).int()).bool()) for a,b in zip(org_weights, init_weights)])
	assert res
コード例 #26
0
def test_initializers_1(initializers, test_layers):

    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    deepimage = DeepImage(pretrained=True)
    model = WideDeep(
        wide=wide,
        deeptabular=deeptabular,
        deeptext=deeptext,
        deepimage=deepimage,
        pred_dim=1,
    )
    cmodel = c(model)

    org_weights = []
    for n, p in cmodel.named_parameters():
        if n in test_layers:
            org_weights.append(p)

    trainer = Trainer(model,
                      objective="binary",
                      verbose=0,
                      initializers=initializers)
    init_weights = []
    for n, p in trainer.model.named_parameters():
        if n in test_layers:
            init_weights.append(p)

    res = all([
        torch.all((1 - (a == b).int()).bool())
        for a, b in zip(org_weights, init_weights)
    ])
    assert res
コード例 #27
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def UpdateInformationMatrix(self,Hint,Code,HintingPlayer,Turn):
     # This is the function which performs the modular arithmetic back
     # calculation to convert a code and hint into the underlying encoded
     # information and transfers it into the information matrix.
     ActualResult = self.BackCalcHintedState(Hint,Code,HintingPlayer)
     (CodeList,TypeList,ColList,GroupSetList,EvalSetList,EncodeBase,
          PossibleResultList) = self.ExpandCode(Code)
     NonHintingIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
     OtherNonHintingIDs = [m for m in NonHintingIDs if m != self.SelfID]
     CodePosOtherNonHinting = [m for m,M in enumerate(NonHintingIDs) if M != self.SelfID]
     for i,I in enumerate(CodeList):
         CurrentColList = [int(m) for m in ColList[i].split(',')]
         CurrentOtherColList = [CurrentColList[m] for m in CodePosOtherNonHinting]
         OtherHandVals = []
         for j,J in enumerate(OtherNonHintingIDs):
             # Within this loop "Val" refers to the index of the set which
             # the card is known to belong
             HandValue = self.HandHistory[Turn][J,CurrentOtherColList[j]]
             if TypeList[i] == 'S':
                 HandValue = HandValue[-1]
             else:
                 HandValue = int(HandValue[:-1])
             OtherHandVals.append(
              [m for m,M in enumerate(EvalSetList[i]) if HandValue in M][0])
         if self.SelfID != HintingPlayer:
             SelfVal = int((ActualResult[i] - np.sum(OtherHandVals)) 
                         % len(EvalSetList[i]))
         OtherHandValsRev = c(OtherHandVals)
         OtherHandValsRev.reverse()
         NonHintingVals = ([OtherHandValsRev.pop() if M != self.SelfID 
                             else SelfVal for M in NonHintingIDs])
         for j,J in enumerate(NonHintingIDs):
             RestrictedSet = EvalSetList[i][NonHintingVals[j]]
             RestrictedSet = [str(m) for m in RestrictedSet]
             self.InformationMatrix[J,CurrentColList[j],TypeList[i]] = (
                 list(set(self.InformationMatrix[J,CurrentColList[j],
                 TypeList[i]]).intersection(RestrictedSet)))
コード例 #28
0
def eval(param):

    if not isinstance(param, dict):
        args = vars(param)
    else:
        args = param

    for key in args.keys():
        if args[key] == 'None':
            args[key] = None

    if args['gpu_index'] is not None:
        args['gpus'] = str(args['gpu_index'])

    # MODEL
    ##########################################################
    # # # get framework
    framework = get_class_by_name('conditioned_separation', args['model'])
    if args['spec_type'] != 'magnitude':
        args['input_channels'] = 4
    # # # Model instantiation
    from copy import deepcopy as c
    model_args = c(args)
    model = framework(**model_args)
    ##########################################################

    # Trainer Definition

    # -- checkpoint
    ckpt_path = Path(args['ckpt_root_path']).joinpath(args['model']).joinpath(
        args['run_id'])
    ckpt_path = '{}/{}'.format(str(ckpt_path), args['epoch'])

    # -- logger setting
    log = args['log']
    if log == 'False':
        args['logger'] = False
        args['checkpoint_callback'] = False
        args['early_stop_callback'] = False
    elif log == 'wandb':
        args['logger'] = WandbLogger(project='lasaft_exp',
                                     tags=args['model'],
                                     offline=False,
                                     name=args['run_id'] + '_eval_' +
                                     args['epoch'].replace('=', '_'))
        args['logger'].log_hyperparams(model.hparams)
        args['logger'].watch(model, log='all')
    elif log == 'tensorboard':
        raise NotImplementedError
    else:
        args['logger'] = True  # default
        default_save_path = 'etc/lightning_logs'
        mkdir_if_not_exists(default_save_path)

    # Trainer
    if isinstance(args['gpus'], int):
        if args['gpus'] > 1:
            warn(
                '# gpu and num_workers should be 1, Not implemented: museval for distributed parallel'
            )
            args['gpus'] = 1
            args['distributed_backend'] = None

    valid_kwargs = inspect.signature(Trainer.__init__).parameters
    trainer_kwargs = dict(
        (name, args[name]) for name in valid_kwargs if name in args)

    # DATASET
    ##########################################################
    dataset_args = {
        'musdb_root': args['musdb_root'],
        'batch_size': args['batch_size'],
        'num_workers': args['num_workers'],
        'pin_memory': args['pin_memory'],
        'num_frame': args['num_frame'],
        'hop_length': args['hop_length'],
        'n_fft': args['n_fft']
    }
    dp = DataProvider(**dataset_args)
    ##########################################################

    trainer_kwargs['precision'] = 32
    trainer = Trainer(**trainer_kwargs)
    _, test_data_loader = dp.get_test_dataset_and_loader()
    model = model.load_from_checkpoint(ckpt_path)

    trainer.test(model, test_data_loader)

    return None
コード例 #29
0
ファイル: MPCS.py プロジェクト: dossa328/MPCS
                        distance[input_vertex[i]],
                        distance[priority_Queue[0][0]] + int(
                            graph.get_cost(priority_Queue[0][0],
                                           input_vertex[i]))):
                    distance[input_vertex[i]] = min(
                        distance[input_vertex[i]],
                        distance[priority_Queue[0][0]] + int(
                            graph.get_cost(priority_Queue[0][0],
                                           input_vertex[i])))
                    priority_Queue.append([
                        input_vertex[i], distance[input_vertex[i]],
                        priority_Queue[0][0]
                    ])

        del priority_Queue[0]
        # priority_Queue.sort()
        priority_Queue = sorted(priority_Queue, key=lambda val: val[1])
    else:
        del priority_Queue[0]
        priority_Queue = sorted(priority_Queue, key=lambda val: val[1])

cost_matrix2 = c(graph.cost_matrix)
app_path = ap()
print app_path.out_dfs(in_start, in_end, input_vertex, cost_matrix2)
# print sorted(distance, key=lambda t: t[1])

p_distance = sorted(distance.items())

for i in range(len(p_distance)):
    print p_distance[i][1]
コード例 #30
0
ファイル: encoding_player.py プロジェクト: benzax/hanabi
 def CodeFromInfoMat(self,CurrentPlayer,NumInHand):        
     OtherPlayers = [i for i in range(self.nPlayers) if i != CurrentPlayer]
     HandNumOther = [NumInHand[K] for K in OtherPlayers]
     NumPosMat = np.zeros([self.nPlayers,self.nCards])
     SuitPosMat = np.zeros([self.nPlayers,self.nCards])
     for i,I in enumerate(self.InformationMatrix['NumMat']):
         for j,J in enumerate(I):
             KnownStr = c(J)
             if KnownStr == 'x': KnownStr = self.NumStr
             NumPosMat[i,j] = len(KnownStr.split(','))
     for i,I in enumerate(self.InformationMatrix['SuitMat']):
         for j,J in enumerate(I):
             KnownStr = c(J)
             if KnownStr == 'x': KnownStr = self.SuitStr
             SuitPosMat[i,j] = len(KnownStr.split(','))
     
     CandidateIndices = list(it.product(range(self.nCards),repeat=self.nPlayers-1))
     ReductionListNum = []
     ReductionListSuit = []
     for i,I in enumerate(CandidateIndices):
         nReductionNum = 0
         nReductionSuit = 0
         for j,J in enumerate(I):
             nReductionNum += NumPosMat[OtherPlayers[j],J] - 1
             nReductionSuit += SuitPosMat[OtherPlayers[j],J] - 1
         ReductionListNum.append(int(nReductionNum))
         ReductionListSuit.append(int(nReductionSuit))
     NumSortInd = np.argsort(ReductionListNum)[::-1]
     SuitSortInd = np.argsort(ReductionListSuit)[::-1]
     
     MaxReduction = np.max([np.max(ReductionListNum),np.max(ReductionListSuit)])
     CodeCandidateList = []
     for i in range(0,MaxReduction+1)[::-1]:
         # Prioritize number resolution over suit resolution
         for j in range(len(NumSortInd)):
             if ReductionListNum[NumSortInd[j]] == i:
                 ColInd = list(CandidateIndices[NumSortInd[j]])
                 if all([HandNumOther[k] > K for k,K in enumerate(ColInd)]):
                     CodeCandidateList.append('N:' + str(ColInd)[1:-1])
         for j in range(len(SuitSortInd)):
             if ReductionListNum[SuitSortInd[j]] == i:
                 ColInd = list(CandidateIndices[SuitSortInd[j]])
                 if all([HandNumOther[k] > K for k,K in enumerate(ColInd)]):
                     CodeCandidateList.append('S:' + str(ColInd)[1:-1])
     
     CodeSelection = [CodeCandidateList[0]]
     for i in CodeCandidateList:
         if i[0] != CodeSelection[0][0]:
             CodeSelection.append(i)
             break
         else:
             ColInd = [int(k) for k in i.split(':')[1].split(',')]
             if not any(np.equal([int(k) for k in CodeSelection[0].split(':')[1].split(',')],
                                  ColInd)):
                  CodeSelection.append(i)
                  break
     CodeStr = ''
     for I in CodeSelection:
         CodeStr += '4' + I[0] + '_all_'
         for j,J in enumerate([int(K) for K in I.split(':')[1].split(',')]):
             CodeStr += str(OtherPlayers[j]) + ',' + str(J) + ':'
         CodeStr = CodeStr[:-1]
         CodeStr += '__'
     CodeStr = CodeStr[:-2]
     return CodeStr
コード例 #31
0
ファイル: encoding_player.py プロジェクト: benzax/hanabi
    def play(self, r):
        r.HandHistory.append(c(r.h))
        nPriorTurns = len(r.playHistory)
        if r.suits != 'rygbw':
            raise NameError('Encoding AI requires vanilla suits\n')
        for i in r.NameRecord:
            if i[:-1] != 'Encoder':
                raise NameError('Encoding AI must only play with other encoders')
        if r.nPlayers != 5:
            raise NameError('Encoding AI must play in a 5 player game')
            
        if nPriorTurns <= r.nPlayers - 1:
            self.InitializeConstants(r)

        for i,I in enumerate(r.playHistory):            
            if i > self.iRecord:
                self.iRecord = c(i)
                if i == len(self.CodeList):
                    self.CodeList.append('')
                PlayingPlayer = (i % self.nPlayers) # Determines which player made this move
                if I[0] == 'hint':
                    GivenHint = list(I[1])
                    if self.CodeList[i] == '':
                        NumInHand =  [len(K.cards) for K in r.HandHistory[i]]
                        self.CodeList[i] = self.CodeFromInfoMat(PlayingPlayer,NumInHand)
                    Code = self.CodeList[i]
                    EncodedValue = self.BackOutEncodedValue(self.EncodingTables[PlayingPlayer],GivenHint)
                    for j in [k for k in range(self.nPlayers) if k != PlayingPlayer]:
                        RestrictedDenseOtherHands = self.CompleteHandToInt(r,[j,PlayingPlayer],i)
                        self.ValueFromCode(Code,RestrictedDenseOtherHands,EncodedValue,j)
                    
                    if GivenHint[1] in '12345':
                        HintType = 'N'
                    elif GivenHint[1] in r.suits:
                        HintType = 'S'
                    else:
                        raise NameError('')
                        
                    MatLabel = {'N':'NumMat','S':'SuitMat'}[HintType]
                    for j,J in enumerate(r.HandHistory[i][GivenHint[0]].cards):
                        
                        PriorKnowledge = c(self.InformationMatrix[MatLabel][GivenHint[0],j])
                        if PriorKnowledge == 'x':
                            if HintType == 'N':
                                PriorKnowledge = self.NumStr
                            else:
                                PriorKnowledge = self.SuitStr
                        else:
                            if HintType == 'S':
                                if len(PriorKnowledge) == 1:
                                    PriorKnowledge = str([r.suits[int(k)] for k in PriorKnowledge.split(',')])[1:-1]
    
                        DirectSet = list(set(PriorKnowledge.split(',')).intersection(J['direct']))
                        if len(DirectSet) == 1:
                            pass
                            if HintType == 'S':
                                self.InformationMatrix[MatLabel][GivenHint[0],j] = c([str(k) for k,K in enumerate(r.suits) if K == DirectSet[0]][0])
                            else:
                                self.InformationMatrix[MatLabel][GivenHint[0],j] = c(DirectSet[0])
                        else:
                            StrictIndirect = set(J['indirect']) - set(J['direct'])
                            IndirectSet = set(PriorKnowledge.split(',')) - StrictIndirect
                            
                            #Currently only use indirect method for numeric hints
                            if HintType == 'N':
                                self.InformationMatrix[MatLabel][GivenHint[0],j] = c(str([int(k) for k in IndirectSet])[1:-1])
                        
                        if len(self.InformationMatrix[MatLabel][GivenHint[0],j]) == 0:
                            raise NameError('Error: Possibility has been reduced to empty set')      
                      
                    self.CheckEncoding(r,i+1)
                elif I[0] == 'play' or I[0] == 'discard':
                    self.RunningPlayInd += 1                
                    for key in self.InformationMatrix:
                        for j in range(r.DropIndRecord[self.RunningPlayInd]+1,self.nCards):                    
                            self.InformationMatrix[key][PlayingPlayer,j-1] = (
                                c(self.InformationMatrix[key][PlayingPlayer,j]))
    
                        self.InformationMatrix[key][PlayingPlayer,-1] = 'x'
                    
                    self.CheckEncoding(r,i+1)
                else:
                    raise NameError('Unknown action')
            
        
        cards = r.h[r.whoseTurn].cards # don't look!
        DenseOtherHands = self.CompleteHandToInt(r,[self.SelfID])

        # The first 4 turns are hard coded
        if nPriorTurns < 4:
            Code = self.CodeList[nPriorTurns]
            EncodedValue,ResultList = self.InterpretCode(Code,DenseOtherHands)
            Hint = self.EncodingTables[self.SelfID][EncodedValue]
            return 'hint', (Hint[0],Hint[1])
        
        PlayInd = self.GetPlayInd(r.progress)
        if PlayInd != -1:
            return 'play',cards[PlayInd]
            
        DiscardList = self.GetDiscardable(r)
        if len(DiscardList) > 0 and r.hints < self.nPlayers-1:
            return 'discard',cards[DiscardList[0]]
        
        if r.hints > 0:
            NumInHand = [len(K.cards) for K in r.h]
            NewCode = self.CodeFromInfoMat(self.SelfID,NumInHand)    
            EncodedValue,ResultList = self.InterpretCode(NewCode,DenseOtherHands)
            Hint = self.EncodingTables[self.SelfID][EncodedValue]
            return 'hint', (Hint[0],Hint[1])
        
        return 'resign', ''
コード例 #32
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def EvaluateCode(self,OtherIDs,Code,progress):
     # This function takes a code and returns an evaluation of the merit of
     # said code. Currently this takes the form of a degree of freedom (DoF)
     # minimization weighted by some coefficients (AMaster)
 
     # Weighting coefficients for determining set reduction. Currently just
     # naively the number of each card number in the deck
     AMaster = [3,2,2,2,1]
     P,D,O = self.GetPDO(progress)
     for i in P:
         AMaster[i-1] = AMaster[i-1] * 2.
     for i in D:
         AMaster[i-1] = AMaster[i-1] / 2.
     
     DoFReductionList = []
     
     CodeList = Code.split('__')
     TypeList = [i.split('_')[0] for i in CodeList]
     ColList = [i.split('_')[1] for i in CodeList]
     GroupSetList = [i.split('_')[2] for i in CodeList]
     
     NumIndex = [i for i,I in enumerate(TypeList) if I == 'N']
     SuitIndex = [i for i,I in enumerate(TypeList) if I == 'S']
     
     # Calculate numeric DoF reduction
     if len(NumIndex) > 0:
         NumColList = [ColList[i].split(',') for i in NumIndex]
         CodeNumSets = [eval(GroupSetList[i]) for i in NumIndex]
         NumColListSwitch = ([[int(NumColList[i][j]) 
                             for i in range(len(NumColList))] 
                             for j in range(len(NumColList[0]))])
         # This is the list of columns in each row that I need to check to
         # determine the reduction in uncertainty for a given code
         ColCheckList = [list(set(i)) for i in NumColListSwitch]
         
         for i,I in enumerate(ColCheckList):
             for j,J in enumerate(I):
                 InitialInfoSet = [int(m) for m in
                                  self.InformationMatrix[OtherIDs[i],J,'N']]
                 AParticular = [AMaster[m-1] for m in InitialInfoSet]
                 nPosFinal = []
                 for PosValInd,PossibleValue in enumerate(InitialInfoSet):
                     InfoSetRestrict = set(c(InitialInfoSet))
                     for k,K in enumerate(NumColListSwitch[i]):
                         if K == J:
                             InfoSetRestrict = InfoSetRestrict.intersection(
                                         [m for m in CodeNumSets[k] if 
                                         PossibleValue in m][0])
                    
                     nPosFinal.append(len(InfoSetRestrict))
                 DoFReduction = len(InitialInfoSet) - (
                     1./np.sum(AParticular)*np.sum([AParticular[m]*
                     nPosFinal[m] for m in range(len(AParticular))]))
                 DoFReductionList.append(DoFReduction)
                 
     # Calculate suit DoF reduction
     if len(SuitIndex) > 0:
         SuitColList = [ColList[i].split(',') for i in SuitIndex]
         SuitColListSwitch = ([[int(SuitColList[i][j]) 
                             for i in range(len(SuitColList))] 
                             for j in range(len(SuitColList[0]))])
         ColCheckList = [list(set(i)) for i in SuitColListSwitch]
         for i,I in enumerate(ColCheckList):
             for j,J in enumerate(I):
                 InitialInfoSet = [m for m in
                                  self.InformationMatrix[OtherIDs[i],J,'S']]
                 DoFReduction = len(InitialInfoSet) - 1
                 DoFReductionList.append(DoFReduction)
     return np.sum(DoFReductionList)
コード例 #33
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
    def GenerateCode(self,TurnNumber,HintingPlayer,CardNumberGroups,progress):
        # Iterates through a number of candidate codes (using common seed 
        # Monte Carlo) and selects the best based on some evaluation criteria
        OtherIDs = [m for m in range(self.nPlayers) if m != HintingPlayer]
        self.StartRandom(self.RandomSeedList[TurnNumber])
        SuitSetStr = ''
        for i in self.SuitSet:
            SuitSetStr += '[' + i +']' + ','
        SuitSetStr = '[' + SuitSetStr[:-1] +']'
        
        # For the various numerical subset groupings (including the trivial 
        # case where each value is its own subset) there is a number of DoF 
        # needed to transmit the information
        RequiredBase = [len(i) for i in CardNumberGroups]
        BaseSets = [[] for i in range(5)]
        BaseSets[4].append('0S')
        for i,I in enumerate(RequiredBase):
            BaseSets[I-1].append(str(i)+'N')
        
        ValidCombinations = []
        for i in self.NumSetCombo:
            PreProduct = []
            for j in i:
                if len(BaseSets[j-1]) > 0:
                    PreProduct.append(BaseSets[j-1])
            for j in list(it.product(*PreProduct)):
                if len(j) > 0:
                    ValidCombinations.append(j)
        
        nMCPerValidCombo = int(self.nMCCandidates/len(ValidCombinations))
            
        CodeCandidateList = []
        for i in ValidCombinations:
            for k in range(nMCPerValidCombo):
                TrialStr = ''
                for j in i:
                    TrialStr += j[-1]
                    TrialStr += '_'
                    ColComboChoice = random.randint(0,
                                            self.ColumnCombinations.shape[0]-1)
                    Cols =c(self.ColumnCombinations[ColComboChoice,:]).tolist()
                    ColInPlay = ([self.InPlay[TurnNumber][M,Cols[m]] 
                                    for m,M in enumerate(OtherIDs)])
                    for l,L in enumerate(ColInPlay):
                        if not L:
                            Cols[l] = -1
                    TrialStr +=  re.sub(' ','',str(Cols)[1:-1])
                    TrialStr += '_'
                    if j[-1] == 'S':
                        TrialStr += SuitSetStr
                    else:
                        TrialStr += re.sub(' ','',str(
                                    CardNumberGroups[int(j[:-1])]))
                    TrialStr += '__'
                CodeCandidateList.append(TrialStr[:-2])          
    
        BestReduction = 0
        BestCode = CodeCandidateList[0]
        for i,I in enumerate(CodeCandidateList):
            Reduction = self.EvaluateCode(OtherIDs,I,progress)
            if  Reduction > BestReduction:
                BestReduction = Reduction
                BestCode = I

        self.EndRandom()
        return BestCode
コード例 #34
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
 def start(self, x):
     for i in range(5):
         self.history[i] = c(x)
コード例 #35
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
 def update(self, x):
     self.history[0:4, :, :] = c(
         self.history[1:5, :, :])  # TODO maybe bottleneck
     self.history[4] = c(x)
コード例 #36
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
 def __init__(self, state, action, reward, done):
     self.state = np.float16(state)  # np array: 5 by 84 by 84
     self.action = c(action)
     self.reward = np.float16(reward)
     self.done = c(done)
コード例 #37
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
 def UpdateInfoMat(self,r):
     # This is the heart of the encoding scheme. It takes the hints that 
     # have been given and inverts the encoding to determine what has been
     # transmitted.
     CurrentTurn = len(r.playHistory)
     if CurrentTurn > 0:
         FirstEvalTurn = np.max([CurrentTurn - self.nPlayers,0])
         TurnEvalRange = range(FirstEvalTurn,CurrentTurn)
         for Turn in TurnEvalRange:
             PlayType = r.playHistory[Turn][0]
             CurrentPlayer = Turn % self.nPlayers
             if PlayType == 'hint':
                 HintingPlayer = CurrentPlayer
                 
                 # Back out the dynamic code chosen by the hinting player
                 Code = self.GenerateCode(Turn,HintingPlayer,
                             self.GroupCardNumbers(r.progressHistory[Turn]),
                             r.progressHistory[Turn])
                 self.UpdateInformationMatrix(r.playHistory[Turn][1],Code,
                      HintingPlayer,Turn)
                 for i in range(self.nPlayers):
                     for j in range(self.nCards):
                         # Use the actual hinted information in addition to
                         # the encoded information
                         for k in self.DirectRecord[Turn][i,j]:
                             if k in self.NumberSet:
                                 self.InformationMatrix[i,j,'N'] = k
                             else:
                                 self.InformationMatrix[i,j,'S'] = k
                         self.InformationMatrix[i,j,'N'] = list(set(
                             self.InformationMatrix[i,j,'N']).difference(
                             self.IndirectRecord[Turn][i,j]))
                         self.InformationMatrix[i,j,'S'] = list(set(
                             self.InformationMatrix[i,j,'S']).difference(
                             self.IndirectRecord[Turn][i,j]))
                             
                         # Use card counting methods to further restrict
                         # possibilities.
                         Improvement = True
                         while Improvement:
                             Improvement = self.CardCountInfoMat(r,Turn)
             elif PlayType == 'play' or PlayType == 'discard':
                 # Shift cards to the left and initialize the rightmost
                 # card as unknown: [1,2,3,4,5]['r','y','g','b','w']
                 # If there is no card (endgame) this initialization is
                 # incorrect; however anything that points to that slot gets
                 # redirected to the dummy r1 at the -1 position.
                 self.RunningPlayInd += 1  
                 DroppedCardInd = r.DropIndRecord[self.RunningPlayInd]
                 for j in range(DroppedCardInd,self.nCards-1):
                     self.InformationMatrix[CurrentPlayer,j,'N'] = c(
                             self.InformationMatrix[CurrentPlayer,j+1,'N'])
                     self.InformationMatrix[CurrentPlayer,j,'S'] = c(
                             self.InformationMatrix[CurrentPlayer,j+1,'S'])
                 self.InformationMatrix[CurrentPlayer,self.nCards-1,'N'] = (
                         c(self.NumberSet))
                 self.InformationMatrix[CurrentPlayer,self.nCards-1,'S'] = (
                         c(self.SuitSet))
             else:
                 raise NameError('Still to be implemented')
     # Raise exception if a mistake is made
     self.CheckInfoMat(r)
コード例 #38
0
ファイル: GeneralEncoding.py プロジェクト: robieta/hanabi
    def InitializeConstants(self,r):
        """ Monte Carlo Constants"""
        # The combinatorics are such that complete enumeration is impractical.
        # Instead, combinations are psudo randomly selected and tested. Larger
        # sampling will produce better results at the cost of longer run times
        self.nMCCandidates = np.float(1e2)
    
    
        # This block initializes constants which depend on game specifics         
        self.nPlayers = r.nPlayers
        self.nCards = len(r.h[r.whoseTurn].cards)
        
        if not self.Initialized:
            self.Initialized = True
            self.StaticCombinatorics()
        
        self.SelfID = r.whoseTurn
        self.OtherIDs = [i for i in range(r.nPlayers) if i != self.SelfID]
        
        self.SuitSet = ['r','y','g','b','w']
        self.NumberSet = [str(i+1) for i in range(5)]
        
        self.SortedDeck = []
        for suit in self.SuitSet:
            for number in '1112233445':
                self.SortedDeck.append(number + suit)
        
        # I choose to represent the information matrix as a dictionary because
        # it makes it easier to retreive elements. The tradeoff is that the 
        # intrinsic structure is not contained in the variable. However, the
        # shape is always nPlayers x nCards, so this is acceptable
        self.InformationMatrix = {}
        for i in range(self.nPlayers):
            # "Dummy" r1 at the -1 position can be pointed to during encoding
            # if the origional target is not present (less than full hand size)
            # See self.GenerateHandRecord for more information
            self.InformationMatrix[i,-1,'S'] = ['r']
            self.InformationMatrix[i,-1,'N'] = [1]
            for j in range(self.nCards):
                self.InformationMatrix[i,j,'S'] = c(self.SuitSet)
                self.InformationMatrix[i,j,'N'] = c(self.NumberSet)

        # I plan on using random sampling methods to study different play
        # strategies. However, introduction of a full CSPRNG would 
        # desynchronize the players. Instead, I use a shared fixed seed so all
        # players can access the same list of psudo random numbers.                
        self.StartRandom(r.CommonSeed)
        self.RandomSeedList = [random.randint(1,sys.maxint) for i in 
                               range(100)]
        self.EndRandom()
        
        # added to avoid "magic numbers"
        self.MaxCardNumber = np.max([int(i) for i in self.NumberSet])
        
        # The encoding AI considers transmitting encoded subsets to more
        # efficiently satisfy the integer constrained nature of bits        
        if self.nPlayers == 2:
            self.NumSetCombo = [[2,5],[3,3]]
        elif self.nPlayers == 3:
            self.NumSetCombo =[[2,2,5],[4,5],[2,3,3]]
        elif self.nPlayers == 4:
            self.NumSetCombo = [[2,3,5],[3,3,3],[5,5],[2,2,2,3]]
        elif self.nPlayers == 5:
            self.NumSetCombo = [[2,2,2,5],[2,4,5],[2,2,3,3],[3,3,4],
                                [2,2,2,2,2],[5,5]]
        else:
            raise NameError('Invalid number of players for this AI')
            
        # Misc. Values
        self.RunningPlayInd = -1
コード例 #39
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
def train(env,
          episodes,
          learning_rate=0.0001,
          epsilon=1.0,
          gamma=0.99,
          min_epsilon=0.05,
          epsilon_step=1e-6,
          reset=False,
          replay_capacity=100000):
    main_cnn = CNN((Height, Width, 4), 4).to(device)  # Q initialization
    target_cnn = CNN((Height, Width, 4), 4).to(device)
    target_cnn.load_state_dict(main_cnn.state_dict())
    target_cnn.eval()

    criterion = nn.MSELoss().to(device)
    optimizer = torch.optim.RMSprop(main_cnn.parameters(), lr=learning_rate)

    if reset == False:
        try:
            main_cnn.load_state_dict(torch.load('{}_cnn.pkl'.format(run_name)))
            target_cnn.load_state_dict(main_cnn.state_dict())
            optimizer.load_state_dict(
                torch.load('{}_optimizer'.format(run_name)))
        except:
            pass

    wandb.watch(main_cnn)

    step = 0
    history = HISTORY(Height, Width)
    replay_memory = REPLAY_MEMORY(replay_capacity)

    reward_history = []
    count_action_history = []

    for episode in tqdm(range(episodes)):

        state = env.reset()
        state = preprocessing(state)
        history.start(state)
        reward = 0

        count_action = [0, 0, 0, 0]

        while True:

            state = c(history.history[1:])

            # Choose Action
            if np.random.random() < 1 - epsilon:
                action = target_cnn(tensor(state)).to("cpu")
                action = torch.argmax(action).item()
            else:
                action = np.random.randint(0, 4)

            count_action[action] += 1
            epsilon = max(min_epsilon, epsilon - epsilon_step)

            # Step
            step += 1
            state_next, reward_step, done, info = env.step(action)
            state_next = preprocessing(state_next)
            history.update(state_next)

            reward += reward_step
            replay_memory.update(
                DATA(history.history, action, reward_step, int(done)))

            if step >= replay_capacity // 2 and step % 4 == 0:

                main_cnn.train()
                states, actions, rewards_step, states_next, dones = replay_memory.sample(
                    128)

                states = torch.from_numpy(states).float().to(device)
                actions = torch.from_numpy(actions).to(device)
                rewards_step = torch.from_numpy(rewards_step).float().to(
                    device)
                states_next = torch.from_numpy(states_next).float().to(device)
                dones = torch.from_numpy(dones).float().to(device)

                Q_main = torch.sum(main_cnn(states) * F.one_hot(actions, 4),
                                   dim=-1)  # main: for training

                with torch.no_grad():
                    Q_target = rewards_step + (1 - dones) * gamma * torch.max(
                        target_cnn(states_next), dim=-1)[0].detach()

                optimizer.zero_grad()

                loss = criterion(Q_main, Q_target)
                loss.backward()
                optimizer.step()

            if step % 10000 == 0:
                target_cnn.load_state_dict(main_cnn.state_dict())

            if done:
                break

        if episode % 100 == 0:

            # display.clear_output()
            print(step, episode)
            plt.title("reward_history, episode: {} epsilon: {}".format(
                episode, epsilon))
            plt.plot(reward_history)
            plt.show()

        reward_history.append(reward)
        count_action_history.append(count_action)
        wandb.log({
            "Reward": reward,
            "episode": episode,
            "epsilon": epsilon,
            "step": step
        })

        if episode % 1000 == 0:

            torch.save(target_cnn.state_dict(), '{}_cnn.pkl'.format(run_name))
            torch.save(optimizer.state_dict(), '{}_cnn.pkl'.format(run_name))
            torch.save(
                target_cnn.state_dict(),
                os.path.join(wandb.run.dir, '{}_model.pt'.format(run_name)))
            torch.save(
                optimizer.state_dict(),
                os.path.join(wandb.run.dir,
                             '{}_optimizer.pt'.format(run_name)))

    return cnn, reward_history
コード例 #40
0
ファイル: Atari (DQN).py プロジェクト: nemo-algorithm/AI
    def __init__(self, capacity):

        self.replay = []
        self.capacity = c(capacity)
        self.time = 0